diff --git a/cli/testdata/coder_list_--output_json.golden b/cli/testdata/coder_list_--output_json.golden index b1e97296493c0..49e51d408285c 100644 --- a/cli/testdata/coder_list_--output_json.golden +++ b/cli/testdata/coder_list_--output_json.golden @@ -52,6 +52,10 @@ "ttl_ms": 28800000, "last_used_at": "[timestamp]", "deleting_at": null, - "locked_at": null + "locked_at": null, + "health": { + "healthy": true, + "failing_agents": [] + } } ] diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 298255dddcecf..2016407b6b151 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -9592,6 +9592,14 @@ const docTemplate = `{ "type": "string", "format": "date-time" }, + "health": { + "description": "Health shows the health of the workspace and information about\nwhat is causing an unhealthy status.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.WorkspaceHealth" + } + ] + }, "id": { "type": "string", "format": "uuid" @@ -9689,6 +9697,14 @@ const docTemplate = `{ "type": "string", "format": "date-time" }, + "health": { + "description": "Health reports the health of the agent.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.WorkspaceAgentHealth" + } + ] + }, "id": { "type": "string", "format": "uuid" @@ -9783,6 +9799,21 @@ const docTemplate = `{ } } }, + "codersdk.WorkspaceAgentHealth": { + "type": "object", + "properties": { + "healthy": { + "description": "Healthy is true if the agent is healthy.", + "type": "boolean", + "example": false + }, + "reason": { + "description": "Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true.", + "type": "string", + "example": "agent has lost connection" + } + } + }, "codersdk.WorkspaceAgentLifecycle": { "type": "string", "enum": [ @@ -10149,6 +10180,24 @@ const docTemplate = `{ } } }, + "codersdk.WorkspaceHealth": { + "type": "object", + "properties": { + "failing_agents": { + "description": "FailingAgents lists the IDs of the agents that are failing, if any.", + "type": "array", + "items": { + "type": "string", + "format": "uuid" + } + }, + "healthy": { + "description": "Healthy is true if the workspace is healthy.", + "type": "boolean", + "example": false + } + } + }, "codersdk.WorkspaceProxy": { "type": "object", "properties": { diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index d05a01871db27..5d7a4b946d5c2 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -8659,6 +8659,14 @@ "type": "string", "format": "date-time" }, + "health": { + "description": "Health shows the health of the workspace and information about\nwhat is causing an unhealthy status.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.WorkspaceHealth" + } + ] + }, "id": { "type": "string", "format": "uuid" @@ -8756,6 +8764,14 @@ "type": "string", "format": "date-time" }, + "health": { + "description": "Health reports the health of the agent.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.WorkspaceAgentHealth" + } + ] + }, "id": { "type": "string", "format": "uuid" @@ -8850,6 +8866,21 @@ } } }, + "codersdk.WorkspaceAgentHealth": { + "type": "object", + "properties": { + "healthy": { + "description": "Healthy is true if the agent is healthy.", + "type": "boolean", + "example": false + }, + "reason": { + "description": "Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true.", + "type": "string", + "example": "agent has lost connection" + } + } + }, "codersdk.WorkspaceAgentLifecycle": { "type": "string", "enum": [ @@ -9187,6 +9218,24 @@ } } }, + "codersdk.WorkspaceHealth": { + "type": "object", + "properties": { + "failing_agents": { + "description": "FailingAgents lists the IDs of the agents that are failing, if any.", + "type": "array", + "items": { + "type": "string", + "format": "uuid" + } + }, + "healthy": { + "description": "Healthy is true if the workspace is healthy.", + "type": "boolean", + "example": false + } + } + }, "codersdk.WorkspaceProxy": { "type": "object", "properties": { diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index 4ae04cba22b72..bfe61b4a180df 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -1262,6 +1262,24 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin workspaceAgent.ReadyAt = &dbAgent.ReadyAt.Time } + switch { + case workspaceAgent.Status != codersdk.WorkspaceAgentConnected && workspaceAgent.LifecycleState == codersdk.WorkspaceAgentLifecycleOff: + workspaceAgent.Health.Reason = "agent is not running" + case workspaceAgent.Status == codersdk.WorkspaceAgentTimeout: + workspaceAgent.Health.Reason = "agent is taking too long to connect" + case workspaceAgent.Status == codersdk.WorkspaceAgentDisconnected: + workspaceAgent.Health.Reason = "agent has lost connection" + // Note: We could also handle codersdk.WorkspaceAgentLifecycleStartTimeout + // here, but it's more of a soft issue, so we don't want to mark the agent + // as unhealthy. + case workspaceAgent.LifecycleState == codersdk.WorkspaceAgentLifecycleStartError: + workspaceAgent.Health.Reason = "agent startup script exited with an error" + case workspaceAgent.LifecycleState.ShuttingDown(): + workspaceAgent.Health.Reason = "agent is shutting down" + default: + workspaceAgent.Health.Healthy = true + } + return workspaceAgent, nil } diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 719789e33717e..95d75d16810c8 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -72,6 +72,7 @@ func TestWorkspaceAgent(t *testing.T) { require.Equal(t, tmpDir, workspace.LatestBuild.Resources[0].Agents[0].Directory) _, err = client.WorkspaceAgent(ctx, workspace.LatestBuild.Resources[0].Agents[0].ID) require.NoError(t, err) + require.True(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy) }) t.Run("HasFallbackTroubleshootingURL", func(t *testing.T) { t.Parallel() @@ -167,6 +168,8 @@ func TestWorkspaceAgent(t *testing.T) { }, testutil.IntervalMedium, "agent status timeout") require.Equal(t, wantTroubleshootingURL, workspace.LatestBuild.Resources[0].Agents[0].TroubleshootingURL) + require.False(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy) + require.NotEmpty(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Reason) }) } diff --git a/coderd/workspaces.go b/coderd/workspaces.go index 0910a1d62f0c0..8324d0afae0b2 100644 --- a/coderd/workspaces.go +++ b/coderd/workspaces.go @@ -1110,6 +1110,15 @@ func convertWorkspace( lockedAt = &workspace.LockedAt.Time } + failingAgents := []uuid.UUID{} + for _, resource := range workspaceBuild.Resources { + for _, agent := range resource.Agents { + if !agent.Health.Healthy { + failingAgents = append(failingAgents, agent.ID) + } + } + } + var ( ttlMillis = convertWorkspaceTTLMillis(workspace.Ttl) deletingAt = calculateDeletingAt(workspace, template, workspaceBuild) @@ -1135,6 +1144,10 @@ func convertWorkspace( LastUsedAt: workspace.LastUsedAt, DeletingAt: deletingAt, LockedAt: lockedAt, + Health: codersdk.WorkspaceHealth{ + Healthy: len(failingAgents) == 0, + FailingAgents: failingAgents, + }, } } diff --git a/coderd/workspaces_test.go b/coderd/workspaces_test.go index fdaed96137435..3204e76b5261e 100644 --- a/coderd/workspaces_test.go +++ b/coderd/workspaces_test.go @@ -164,6 +164,148 @@ func TestWorkspace(t *testing.T) { assert.Equal(t, templateDisplayName, ws.TemplateDisplayName) assert.Equal(t, templateAllowUserCancelWorkspaceJobs, ws.TemplateAllowUserCancelWorkspaceJobs) }) + + t.Run("Health", func(t *testing.T) { + t.Parallel() + + t.Run("Healthy", func(t *testing.T) { + t.Parallel() + client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionApply: []*proto.Provision_Response{{ + Type: &proto.Provision_Response_Complete{ + Complete: &proto.Provision_Complete{ + Resources: []*proto.Resource{{ + Name: "some", + Type: "example", + Agents: []*proto.Agent{{ + Id: uuid.NewString(), + Auth: &proto.Agent_Token{}, + }}, + }}, + }, + }, + }}, + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + workspace, err := client.Workspace(ctx, workspace.ID) + require.NoError(t, err) + + agent := workspace.LatestBuild.Resources[0].Agents[0] + + assert.True(t, workspace.Health.Healthy) + assert.Equal(t, []uuid.UUID{}, workspace.Health.FailingAgents) + assert.True(t, agent.Health.Healthy) + assert.Empty(t, agent.Health.Reason) + }) + + t.Run("Unhealthy", func(t *testing.T) { + t.Parallel() + client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionApply: []*proto.Provision_Response{{ + Type: &proto.Provision_Response_Complete{ + Complete: &proto.Provision_Complete{ + Resources: []*proto.Resource{{ + Name: "some", + Type: "example", + Agents: []*proto.Agent{{ + Id: uuid.NewString(), + Auth: &proto.Agent_Token{}, + ConnectionTimeoutSeconds: 1, + }}, + }}, + }, + }, + }}, + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + var err error + testutil.Eventually(ctx, t, func(ctx context.Context) bool { + workspace, err = client.Workspace(ctx, workspace.ID) + return assert.NoError(t, err) && !workspace.Health.Healthy + }, testutil.IntervalMedium) + + agent := workspace.LatestBuild.Resources[0].Agents[0] + + assert.False(t, workspace.Health.Healthy) + assert.Equal(t, []uuid.UUID{agent.ID}, workspace.Health.FailingAgents) + assert.False(t, agent.Health.Healthy) + assert.NotEmpty(t, agent.Health.Reason) + }) + + t.Run("Mixed health", func(t *testing.T) { + t.Parallel() + client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionApply: []*proto.Provision_Response{{ + Type: &proto.Provision_Response_Complete{ + Complete: &proto.Provision_Complete{ + Resources: []*proto.Resource{{ + Name: "some", + Type: "example", + Agents: []*proto.Agent{{ + Id: uuid.NewString(), + Name: "a1", + Auth: &proto.Agent_Token{}, + }, { + Id: uuid.NewString(), + Name: "a2", + Auth: &proto.Agent_Token{}, + ConnectionTimeoutSeconds: 1, + }}, + }}, + }, + }, + }}, + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + var err error + testutil.Eventually(ctx, t, func(ctx context.Context) bool { + workspace, err = client.Workspace(ctx, workspace.ID) + return assert.NoError(t, err) && !workspace.Health.Healthy + }, testutil.IntervalMedium) + + assert.False(t, workspace.Health.Healthy) + assert.Len(t, workspace.Health.FailingAgents, 1) + + agent1 := workspace.LatestBuild.Resources[0].Agents[0] + agent2 := workspace.LatestBuild.Resources[0].Agents[1] + + assert.Equal(t, []uuid.UUID{agent2.ID}, workspace.Health.FailingAgents) + assert.True(t, agent1.Health.Healthy) + assert.Empty(t, agent1.Health.Reason) + assert.False(t, agent2.Health.Healthy) + assert.NotEmpty(t, agent2.Health.Reason) + }) + }) } func TestAdminViewAllWorkspaces(t *testing.T) { diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index 1b6c4d76d3a2b..208c4511d261a 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -164,10 +164,16 @@ type WorkspaceAgent struct { ConnectionTimeoutSeconds int32 `json:"connection_timeout_seconds"` TroubleshootingURL string `json:"troubleshooting_url"` // Deprecated: Use StartupScriptBehavior instead. - LoginBeforeReady bool `json:"login_before_ready"` - ShutdownScript string `json:"shutdown_script,omitempty"` - ShutdownScriptTimeoutSeconds int32 `json:"shutdown_script_timeout_seconds"` - Subsystem AgentSubsystem `json:"subsystem"` + LoginBeforeReady bool `json:"login_before_ready"` + ShutdownScript string `json:"shutdown_script,omitempty"` + ShutdownScriptTimeoutSeconds int32 `json:"shutdown_script_timeout_seconds"` + Subsystem AgentSubsystem `json:"subsystem"` + Health WorkspaceAgentHealth `json:"health"` // Health reports the health of the agent. +} + +type WorkspaceAgentHealth struct { + Healthy bool `json:"healthy" example:"false"` // Healthy is true if the agent is healthy. + Reason string `json:"reason,omitempty" example:"agent has lost connection"` // Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true. } type DERPRegion struct { diff --git a/codersdk/workspaces.go b/codersdk/workspaces.go index c076e9e86b6a2..a2ef823fcb87e 100644 --- a/codersdk/workspaces.go +++ b/codersdk/workspaces.go @@ -43,6 +43,14 @@ type Workspace struct { // unlocked by an admin. It is subject to deletion if it breaches // the duration of the locked_ttl field on its template. LockedAt *time.Time `json:"locked_at" format:"date-time"` + // Health shows the health of the workspace and information about + // what is causing an unhealthy status. + Health WorkspaceHealth `json:"health"` +} + +type WorkspaceHealth struct { + Healthy bool `json:"healthy" example:"false"` // Healthy is true if the workspace is healthy. + FailingAgents []uuid.UUID `json:"failing_agents" format:"uuid"` // FailingAgents lists the IDs of the agents that are failing, if any. } type WorkspacesRequest struct { diff --git a/docs/api/agents.md b/docs/api/agents.md index b8c73c8ceae95..69ff2fbe72318 100644 --- a/docs/api/agents.md +++ b/docs/api/agents.md @@ -450,6 +450,10 @@ curl -X GET http://coder-server:8080/api/v2/workspaceagents/{workspaceagent} \ }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", diff --git a/docs/api/builds.md b/docs/api/builds.md index e3b3aa2437327..665b8778b6865 100644 --- a/docs/api/builds.md +++ b/docs/api/builds.md @@ -88,6 +88,10 @@ curl -X GET http://coder-server:8080/api/v2/users/{user}/workspace/{workspacenam }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -246,6 +250,10 @@ curl -X GET http://coder-server:8080/api/v2/workspacebuilds/{workspacebuild} \ }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -543,6 +551,10 @@ curl -X GET http://coder-server:8080/api/v2/workspacebuilds/{workspacebuild}/res }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -635,6 +647,9 @@ Status Code **200** | `»»» [any property]` | string | false | | | | `»» expanded_directory` | string | false | | | | `»» first_connected_at` | string(date-time) | false | | | +| `»» health` | [codersdk.WorkspaceAgentHealth](schemas.md#codersdkworkspaceagenthealth) | false | | Health reports the health of the agent. | +| `»»» healthy` | boolean | false | | Healthy is true if the agent is healthy. | +| `»»» reason` | string | false | | Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true. | | `»» id` | string(uuid) | false | | | | `»» instance_id` | string | false | | | | `»» last_connected_at` | string(date-time) | false | | | @@ -794,6 +809,10 @@ curl -X GET http://coder-server:8080/api/v2/workspacebuilds/{workspacebuild}/sta }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -957,6 +976,10 @@ curl -X GET http://coder-server:8080/api/v2/workspaces/{workspace}/builds \ }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -1085,6 +1108,9 @@ Status Code **200** | `»»»» [any property]` | string | false | | | | `»»» expanded_directory` | string | false | | | | `»»» first_connected_at` | string(date-time) | false | | | +| `»»» health` | [codersdk.WorkspaceAgentHealth](schemas.md#codersdkworkspaceagenthealth) | false | | Health reports the health of the agent. | +| `»»»» healthy` | boolean | false | | Healthy is true if the agent is healthy. | +| `»»»» reason` | string | false | | Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true. | | `»»» id` | string(uuid) | false | | | | `»»» instance_id` | string | false | | | | `»»» last_connected_at` | string(date-time) | false | | | @@ -1298,6 +1324,10 @@ curl -X POST http://coder-server:8080/api/v2/workspaces/{workspace}/builds \ }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", diff --git a/docs/api/schemas.md b/docs/api/schemas.md index b51286f9a6c04..727e943a8df58 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -4683,6 +4683,10 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in "autostart_schedule": "string", "created_at": "2019-08-24T14:15:22Z", "deleting_at": "2019-08-24T14:15:22Z", + "health": { + "failing_agents": ["497f6eca-6276-4993-bfeb-53cbbbba6f08"], + "healthy": false + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "last_used_at": "2019-08-24T14:15:22Z", "latest_build": { @@ -4747,6 +4751,10 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -4827,27 +4835,28 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------------------------------------- | -------------------------------------------------- | -------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `autostart_schedule` | string | false | | | -| `created_at` | string | false | | | -| `deleting_at` | string | false | | Deleting at indicates the time of the upcoming workspace deletion, if applicable; otherwise it is nil. Workspaces may have impending deletions if Template.InactivityTTL feature is turned on and the workspace is inactive. | -| `id` | string | false | | | -| `last_used_at` | string | false | | | -| `latest_build` | [codersdk.WorkspaceBuild](#codersdkworkspacebuild) | false | | | -| `locked_at` | string | false | | Locked at being non-nil indicates a workspace that has been locked. A locked workspace is no longer accessible by a user and must be unlocked by an admin. It is subject to deletion if it breaches the duration of the locked_ttl field on its template. | -| `name` | string | false | | | -| `organization_id` | string | false | | | -| `outdated` | boolean | false | | | -| `owner_id` | string | false | | | -| `owner_name` | string | false | | | -| `template_allow_user_cancel_workspace_jobs` | boolean | false | | | -| `template_display_name` | string | false | | | -| `template_icon` | string | false | | | -| `template_id` | string | false | | | -| `template_name` | string | false | | | -| `ttl_ms` | integer | false | | | -| `updated_at` | string | false | | | +| Name | Type | Required | Restrictions | Description | +| ------------------------------------------- | ---------------------------------------------------- | -------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `autostart_schedule` | string | false | | | +| `created_at` | string | false | | | +| `deleting_at` | string | false | | Deleting at indicates the time of the upcoming workspace deletion, if applicable; otherwise it is nil. Workspaces may have impending deletions if Template.InactivityTTL feature is turned on and the workspace is inactive. | +| `health` | [codersdk.WorkspaceHealth](#codersdkworkspacehealth) | false | | Health shows the health of the workspace and information about what is causing an unhealthy status. | +| `id` | string | false | | | +| `last_used_at` | string | false | | | +| `latest_build` | [codersdk.WorkspaceBuild](#codersdkworkspacebuild) | false | | | +| `locked_at` | string | false | | Locked at being non-nil indicates a workspace that has been locked. A locked workspace is no longer accessible by a user and must be unlocked by an admin. It is subject to deletion if it breaches the duration of the locked_ttl field on its template. | +| `name` | string | false | | | +| `organization_id` | string | false | | | +| `outdated` | boolean | false | | | +| `owner_id` | string | false | | | +| `owner_name` | string | false | | | +| `template_allow_user_cancel_workspace_jobs` | boolean | false | | | +| `template_display_name` | string | false | | | +| `template_icon` | string | false | | | +| `template_id` | string | false | | | +| `template_name` | string | false | | | +| `ttl_ms` | integer | false | | | +| `updated_at` | string | false | | | ## codersdk.WorkspaceAgent @@ -4883,6 +4892,10 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -4932,6 +4945,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | » `[any property]` | string | false | | | | `expanded_directory` | string | false | | | | `first_connected_at` | string | false | | | +| `health` | [codersdk.WorkspaceAgentHealth](#codersdkworkspaceagenthealth) | false | | Health reports the health of the agent. | | `id` | string | false | | | | `instance_id` | string | false | | | | `last_connected_at` | string | false | | | @@ -5023,6 +5037,22 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | `derp_map` | [tailcfg.DERPMap](#tailcfgderpmap) | false | | | | `disable_direct_connections` | boolean | false | | | +## codersdk.WorkspaceAgentHealth + +```json +{ + "healthy": false, + "reason": "agent has lost connection" +} +``` + +### Properties + +| Name | Type | Required | Restrictions | Description | +| --------- | ------- | -------- | ------------ | --------------------------------------------------------------------------------------------- | +| `healthy` | boolean | false | | Healthy is true if the agent is healthy. | +| `reason` | string | false | | Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true. | + ## codersdk.WorkspaceAgentLifecycle ```json @@ -5301,6 +5331,10 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -5474,6 +5508,22 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | `stopped` | integer | false | | | | `tx_bytes` | integer | false | | | +## codersdk.WorkspaceHealth + +```json +{ + "failing_agents": ["497f6eca-6276-4993-bfeb-53cbbbba6f08"], + "healthy": false +} +``` + +### Properties + +| Name | Type | Required | Restrictions | Description | +| ---------------- | --------------- | -------- | ------------ | -------------------------------------------------------------------- | +| `failing_agents` | array of string | false | | Failing agents lists the IDs of the agents that are failing, if any. | +| `healthy` | boolean | false | | Healthy is true if the workspace is healthy. | + ## codersdk.WorkspaceProxy ```json @@ -5588,6 +5638,10 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -5732,6 +5786,10 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in "autostart_schedule": "string", "created_at": "2019-08-24T14:15:22Z", "deleting_at": "2019-08-24T14:15:22Z", + "health": { + "failing_agents": ["497f6eca-6276-4993-bfeb-53cbbbba6f08"], + "healthy": false + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "last_used_at": "2019-08-24T14:15:22Z", "latest_build": { @@ -5792,6 +5850,10 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", diff --git a/docs/api/templates.md b/docs/api/templates.md index dc22333624801..ca55675a114b6 100644 --- a/docs/api/templates.md +++ b/docs/api/templates.md @@ -1659,6 +1659,10 @@ curl -X GET http://coder-server:8080/api/v2/templateversions/{templateversion}/d }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -1751,6 +1755,9 @@ Status Code **200** | `»»» [any property]` | string | false | | | | `»» expanded_directory` | string | false | | | | `»» first_connected_at` | string(date-time) | false | | | +| `»» health` | [codersdk.WorkspaceAgentHealth](schemas.md#codersdkworkspaceagenthealth) | false | | Health reports the health of the agent. | +| `»»» healthy` | boolean | false | | Healthy is true if the agent is healthy. | +| `»»» reason` | string | false | | Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true. | | `»» id` | string(uuid) | false | | | | `»» instance_id` | string | false | | | | `»» last_connected_at` | string(date-time) | false | | | @@ -2044,6 +2051,10 @@ curl -X GET http://coder-server:8080/api/v2/templateversions/{templateversion}/r }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -2136,6 +2147,9 @@ Status Code **200** | `»»» [any property]` | string | false | | | | `»» expanded_directory` | string | false | | | | `»» first_connected_at` | string(date-time) | false | | | +| `»» health` | [codersdk.WorkspaceAgentHealth](schemas.md#codersdkworkspaceagenthealth) | false | | Health reports the health of the agent. | +| `»»» healthy` | boolean | false | | Healthy is true if the agent is healthy. | +| `»»» reason` | string | false | | Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true. | | `»» id` | string(uuid) | false | | | | `»» instance_id` | string | false | | | | `»» last_connected_at` | string(date-time) | false | | | diff --git a/docs/api/workspaces.md b/docs/api/workspaces.md index 4a9b6b79be138..65a399b0e594b 100644 --- a/docs/api/workspaces.md +++ b/docs/api/workspaces.md @@ -48,6 +48,10 @@ curl -X POST http://coder-server:8080/api/v2/organizations/{organization}/member "autostart_schedule": "string", "created_at": "2019-08-24T14:15:22Z", "deleting_at": "2019-08-24T14:15:22Z", + "health": { + "failing_agents": ["497f6eca-6276-4993-bfeb-53cbbbba6f08"], + "healthy": false + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "last_used_at": "2019-08-24T14:15:22Z", "latest_build": { @@ -112,6 +116,10 @@ curl -X POST http://coder-server:8080/api/v2/organizations/{organization}/member }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -228,6 +236,10 @@ curl -X GET http://coder-server:8080/api/v2/users/{user}/workspace/{workspacenam "autostart_schedule": "string", "created_at": "2019-08-24T14:15:22Z", "deleting_at": "2019-08-24T14:15:22Z", + "health": { + "failing_agents": ["497f6eca-6276-4993-bfeb-53cbbbba6f08"], + "healthy": false + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "last_used_at": "2019-08-24T14:15:22Z", "latest_build": { @@ -292,6 +304,10 @@ curl -X GET http://coder-server:8080/api/v2/users/{user}/workspace/{workspacenam }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -432,6 +448,10 @@ curl -X GET http://coder-server:8080/api/v2/workspaces \ "autostart_schedule": "string", "created_at": "2019-08-24T14:15:22Z", "deleting_at": "2019-08-24T14:15:22Z", + "health": { + "failing_agents": ["497f6eca-6276-4993-bfeb-53cbbbba6f08"], + "healthy": false + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "last_used_at": "2019-08-24T14:15:22Z", "latest_build": { @@ -492,6 +512,10 @@ curl -X GET http://coder-server:8080/api/v2/workspaces \ }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", @@ -609,6 +633,10 @@ curl -X GET http://coder-server:8080/api/v2/workspaces/{workspace} \ "autostart_schedule": "string", "created_at": "2019-08-24T14:15:22Z", "deleting_at": "2019-08-24T14:15:22Z", + "health": { + "failing_agents": ["497f6eca-6276-4993-bfeb-53cbbbba6f08"], + "healthy": false + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "last_used_at": "2019-08-24T14:15:22Z", "latest_build": { @@ -673,6 +701,10 @@ curl -X GET http://coder-server:8080/api/v2/workspaces/{workspace} \ }, "expanded_directory": "string", "first_connected_at": "2019-08-24T14:15:22Z", + "health": { + "healthy": false, + "reason": "agent has lost connection" + }, "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "instance_id": "string", "last_connected_at": "2019-08-24T14:15:22Z", diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 6e0a012a2af4e..57935076d6ae8 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -1145,6 +1145,7 @@ export interface Workspace { readonly last_used_at: string readonly deleting_at?: string readonly locked_at?: string + readonly health: WorkspaceHealth } // From codersdk/workspaceagents.go @@ -1181,6 +1182,13 @@ export interface WorkspaceAgent { readonly shutdown_script?: string readonly shutdown_script_timeout_seconds: number readonly subsystem: AgentSubsystem + readonly health: WorkspaceAgentHealth +} + +// From codersdk/workspaceagents.go +export interface WorkspaceAgentHealth { + readonly healthy: boolean + readonly reason?: string } // From codersdk/workspaceagentconn.go @@ -1300,6 +1308,12 @@ export interface WorkspaceFilter { readonly q?: string } +// From codersdk/workspaces.go +export interface WorkspaceHealth { + readonly healthy: boolean + readonly failing_agents: string[] +} + // From codersdk/workspaces.go export interface WorkspaceOptions { readonly include_deleted?: boolean diff --git a/site/src/testHelpers/entities.ts b/site/src/testHelpers/entities.ts index cf6cd175d21ad..ef8ba1668ba99 100644 --- a/site/src/testHelpers/entities.ts +++ b/site/src/testHelpers/entities.ts @@ -542,6 +542,9 @@ export const MockWorkspaceAgent: TypesGen.WorkspaceAgent = { startup_script_timeout_seconds: 120, shutdown_script_timeout_seconds: 120, subsystem: "envbox", + health: { + healthy: true, + }, } export const MockWorkspaceAgentDisconnected: TypesGen.WorkspaceAgent = { @@ -552,6 +555,10 @@ export const MockWorkspaceAgentDisconnected: TypesGen.WorkspaceAgent = { version: "", latency: {}, lifecycle_state: "ready", + health: { + healthy: false, + reason: "agent is not connected", + }, } export const MockWorkspaceAgentOutdated: TypesGen.WorkspaceAgent = { @@ -596,6 +603,10 @@ export const MockWorkspaceAgentTimeout: TypesGen.WorkspaceAgent = { version: "", latency: {}, lifecycle_state: "created", + health: { + healthy: false, + reason: "agent is taking too long to connect", + }, } export const MockWorkspaceAgentStarting: TypesGen.WorkspaceAgent = { @@ -624,6 +635,10 @@ export const MockWorkspaceAgentStartError: TypesGen.WorkspaceAgent = { id: "test-workspace-agent-start-error", name: "a-workspace-agent-errored-while-running-startup-script", lifecycle_state: "start_error", + health: { + healthy: false, + reason: "agent startup script failed", + }, } export const MockWorkspaceAgentShuttingDown: TypesGen.WorkspaceAgent = { @@ -631,6 +646,10 @@ export const MockWorkspaceAgentShuttingDown: TypesGen.WorkspaceAgent = { id: "test-workspace-agent-shutting-down", name: "a-shutting-down-workspace-agent", lifecycle_state: "shutting_down", + health: { + healthy: false, + reason: "agent is shutting down", + }, } export const MockWorkspaceAgentShutdownTimeout: TypesGen.WorkspaceAgent = { @@ -638,6 +657,10 @@ export const MockWorkspaceAgentShutdownTimeout: TypesGen.WorkspaceAgent = { id: "test-workspace-agent-shutdown-timeout", name: "a-workspace-agent-timed-out-while-running-shutdownup-script", lifecycle_state: "shutdown_timeout", + health: { + healthy: false, + reason: "agent is shutting down", + }, } export const MockWorkspaceAgentShutdownError: TypesGen.WorkspaceAgent = { @@ -645,6 +668,10 @@ export const MockWorkspaceAgentShutdownError: TypesGen.WorkspaceAgent = { id: "test-workspace-agent-shutdown-error", name: "a-workspace-agent-errored-while-running-shutdownup-script", lifecycle_state: "shutdown_error", + health: { + healthy: false, + reason: "agent is shutting down", + }, } export const MockWorkspaceAgentOff: TypesGen.WorkspaceAgent = { @@ -652,6 +679,10 @@ export const MockWorkspaceAgentOff: TypesGen.WorkspaceAgent = { id: "test-workspace-agent-off", name: "a-workspace-agent-is-shut-down", lifecycle_state: "off", + health: { + healthy: false, + reason: "agent is shutting down", + }, } export const MockWorkspaceResource: TypesGen.WorkspaceResource = { @@ -803,6 +834,10 @@ export const MockWorkspace: TypesGen.Workspace = { ttl_ms: 2 * 60 * 60 * 1000, latest_build: MockWorkspaceBuild, last_used_at: "2022-05-16T15:29:10.302441433Z", + health: { + healthy: true, + failing_agents: [], + }, } export const MockStoppedWorkspace: TypesGen.Workspace = {