Skip to content

Commit 1175119

Browse files
authored
Merge branch 'main' into f0ssel/port-sharing-1
2 parents c07e4a1 + e1e352d commit 1175119

File tree

113 files changed

+1638
-669
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+1638
-669
lines changed

.github/pr-deployments/template/main.tf

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,9 @@ provider "kubernetes" {
8888
data "coder_workspace" "me" {}
8989

9090
resource "coder_agent" "main" {
91-
os = "linux"
92-
arch = "amd64"
93-
startup_script_timeout = 180
94-
startup_script = <<-EOT
91+
os = "linux"
92+
arch = "amd64"
93+
startup_script = <<-EOT
9594
set -e
9695
9796
# install and start code-server

agent/agenttest/client.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,10 @@ type FakeAgentAPI struct {
197197
t testing.TB
198198
logger slog.Logger
199199

200-
manifest *agentproto.Manifest
201-
startupCh chan *agentproto.Startup
202-
statsCh chan *agentproto.Stats
200+
manifest *agentproto.Manifest
201+
startupCh chan *agentproto.Startup
202+
statsCh chan *agentproto.Stats
203+
appHealthCh chan *agentproto.BatchUpdateAppHealthRequest
203204

204205
getServiceBannerFunc func() (codersdk.ServiceBannerConfig, error)
205206
}
@@ -244,9 +245,14 @@ func (*FakeAgentAPI) UpdateLifecycle(context.Context, *agentproto.UpdateLifecycl
244245

245246
func (f *FakeAgentAPI) BatchUpdateAppHealths(ctx context.Context, req *agentproto.BatchUpdateAppHealthRequest) (*agentproto.BatchUpdateAppHealthResponse, error) {
246247
f.logger.Debug(ctx, "batch update app health", slog.F("req", req))
248+
f.appHealthCh <- req
247249
return &agentproto.BatchUpdateAppHealthResponse{}, nil
248250
}
249251

252+
func (f *FakeAgentAPI) AppHealthCh() <-chan *agentproto.BatchUpdateAppHealthRequest {
253+
return f.appHealthCh
254+
}
255+
250256
func (f *FakeAgentAPI) UpdateStartup(_ context.Context, req *agentproto.UpdateStartupRequest) (*agentproto.Startup, error) {
251257
f.startupCh <- req.GetStartup()
252258
return req.GetStartup(), nil
@@ -264,10 +270,11 @@ func (*FakeAgentAPI) BatchCreateLogs(context.Context, *agentproto.BatchCreateLog
264270

265271
func NewFakeAgentAPI(t testing.TB, logger slog.Logger, manifest *agentproto.Manifest, statsCh chan *agentproto.Stats) *FakeAgentAPI {
266272
return &FakeAgentAPI{
267-
t: t,
268-
logger: logger.Named("FakeAgentAPI"),
269-
manifest: manifest,
270-
statsCh: statsCh,
271-
startupCh: make(chan *agentproto.Startup, 100),
273+
t: t,
274+
logger: logger.Named("FakeAgentAPI"),
275+
manifest: manifest,
276+
statsCh: statsCh,
277+
startupCh: make(chan *agentproto.Startup, 100),
278+
appHealthCh: make(chan *agentproto.BatchUpdateAppHealthRequest, 100),
272279
}
273280
}

agent/apphealth.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ type WorkspaceAppHealthReporter func(ctx context.Context)
2626

2727
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
2828
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
29+
logger = logger.Named("apphealth")
30+
2931
runHealthcheckLoop := func(ctx context.Context) error {
32+
ctx, cancel := context.WithCancel(ctx)
33+
defer cancel()
34+
3035
// no need to run this loop if no apps for this workspace.
3136
if len(apps) == 0 {
3237
return nil
@@ -87,6 +92,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
8792
return nil
8893
}()
8994
if err != nil {
95+
nowUnhealthy := false
9096
mu.Lock()
9197
if failures[app.ID] < int(app.Healthcheck.Threshold) {
9298
// increment the failure count and keep status the same.
@@ -96,14 +102,21 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
96102
// set to unhealthy if we hit the failure threshold.
97103
// we stop incrementing at the threshold to prevent the failure value from increasing forever.
98104
health[app.ID] = codersdk.WorkspaceAppHealthUnhealthy
105+
nowUnhealthy = true
99106
}
100107
mu.Unlock()
108+
logger.Debug(ctx, "error checking app health",
109+
slog.F("id", app.ID.String()),
110+
slog.F("slug", app.Slug),
111+
slog.F("now_unhealthy", nowUnhealthy), slog.Error(err),
112+
)
101113
} else {
102114
mu.Lock()
103115
// we only need one successful health check to be considered healthy.
104116
health[app.ID] = codersdk.WorkspaceAppHealthHealthy
105117
failures[app.ID] = 0
106118
mu.Unlock()
119+
logger.Debug(ctx, "workspace app healthy", slog.F("id", app.ID.String()), slog.F("slug", app.Slug))
107120
}
108121

109122
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
@@ -137,7 +150,9 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
137150
Healths: lastHealth,
138151
})
139152
if err != nil {
140-
logger.Error(ctx, "failed to report workspace app stat", slog.Error(err))
153+
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
154+
} else {
155+
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
141156
}
142157
}
143158
}

agent/apphealth_test.go

Lines changed: 56 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,21 @@ import (
44
"context"
55
"net/http"
66
"net/http/httptest"
7+
"strings"
78
"sync"
89
"sync/atomic"
910
"testing"
1011
"time"
1112

13+
"github.com/google/uuid"
14+
"github.com/stretchr/testify/assert"
1215
"github.com/stretchr/testify/require"
1316

1417
"cdr.dev/slog"
1518
"cdr.dev/slog/sloggers/slogtest"
1619
"github.com/coder/coder/v2/agent"
20+
"github.com/coder/coder/v2/agent/agenttest"
21+
"github.com/coder/coder/v2/agent/proto"
1722
"github.com/coder/coder/v2/coderd/httpapi"
1823
"github.com/coder/coder/v2/codersdk"
1924
"github.com/coder/coder/v2/codersdk/agentsdk"
@@ -40,12 +45,23 @@ func TestAppHealth_Healthy(t *testing.T) {
4045
},
4146
Health: codersdk.WorkspaceAppHealthInitializing,
4247
},
48+
{
49+
Slug: "app3",
50+
Healthcheck: codersdk.Healthcheck{
51+
Interval: 2,
52+
Threshold: 1,
53+
},
54+
Health: codersdk.WorkspaceAppHealthInitializing,
55+
},
4356
}
4457
handlers := []http.Handler{
4558
nil,
4659
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
4760
httpapi.Write(r.Context(), w, http.StatusOK, nil)
4861
}),
62+
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
63+
httpapi.Write(r.Context(), w, http.StatusOK, nil)
64+
}),
4965
}
5066
getApps, closeFn := setupAppReporter(ctx, t, apps, handlers)
5167
defer closeFn()
@@ -58,7 +74,7 @@ func TestAppHealth_Healthy(t *testing.T) {
5874
return false
5975
}
6076

61-
return apps[1].Health == codersdk.WorkspaceAppHealthHealthy
77+
return apps[1].Health == codersdk.WorkspaceAppHealthHealthy && apps[2].Health == codersdk.WorkspaceAppHealthHealthy
6278
}, testutil.WaitLong, testutil.IntervalSlow)
6379
}
6480

@@ -163,6 +179,12 @@ func TestAppHealth_NotSpamming(t *testing.T) {
163179

164180
func setupAppReporter(ctx context.Context, t *testing.T, apps []codersdk.WorkspaceApp, handlers []http.Handler) (agent.WorkspaceAgentApps, func()) {
165181
closers := []func(){}
182+
for i, app := range apps {
183+
if app.ID == uuid.Nil {
184+
app.ID = uuid.New()
185+
apps[i] = app
186+
}
187+
}
166188
for i, handler := range handlers {
167189
if handler == nil {
168190
continue
@@ -181,23 +203,43 @@ func setupAppReporter(ctx context.Context, t *testing.T, apps []codersdk.Workspa
181203
var newApps []codersdk.WorkspaceApp
182204
return append(newApps, apps...), nil
183205
}
184-
postWorkspaceAgentAppHealth := func(_ context.Context, req agentsdk.PostAppHealthsRequest) error {
185-
mu.Lock()
186-
for id, health := range req.Healths {
187-
for i, app := range apps {
188-
if app.ID != id {
189-
continue
206+
207+
// We don't care about manifest or stats in this test since it's not using
208+
// a full agent and these RPCs won't get called.
209+
//
210+
// We use a proper fake agent API so we can test the conversion code and the
211+
// request code as well. Before we were bypassing these by using a custom
212+
// post function.
213+
fakeAAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
214+
215+
// Process events from the channel and update the health of the apps.
216+
go func() {
217+
appHealthCh := fakeAAPI.AppHealthCh()
218+
for {
219+
select {
220+
case <-ctx.Done():
221+
return
222+
case req := <-appHealthCh:
223+
mu.Lock()
224+
for _, update := range req.Updates {
225+
updateID, err := uuid.FromBytes(update.Id)
226+
assert.NoError(t, err)
227+
updateHealth := codersdk.WorkspaceAppHealth(strings.ToLower(proto.AppHealth_name[int32(update.Health)]))
228+
229+
for i, app := range apps {
230+
if app.ID != updateID {
231+
continue
232+
}
233+
app.Health = updateHealth
234+
apps[i] = app
235+
}
190236
}
191-
app.Health = health
192-
apps[i] = app
237+
mu.Unlock()
193238
}
194239
}
195-
mu.Unlock()
196-
197-
return nil
198-
}
240+
}()
199241

200-
go agent.NewWorkspaceAppHealthReporter(slogtest.Make(t, nil).Leveled(slog.LevelDebug), apps, postWorkspaceAgentAppHealth)(ctx)
242+
go agent.NewWorkspaceAppHealthReporter(slogtest.Make(t, nil).Leveled(slog.LevelDebug), apps, agentsdk.AppHealthPoster(fakeAAPI))(ctx)
201243

202244
return workspaceAgentApps, func() {
203245
for _, closeFn := range closers {

0 commit comments

Comments
 (0)