Skip to content

Commit f825477

Browse files
authored
fix: add timeouts to test telemetry snapshot (#17879)
This PR ensures that waits on channels will time out according to the test context, rather than waiting indefinitely. This should alleviate the panic seen in coder/internal#645 and, if the deadlock recurs, allow the test to be retried automatically in CI.
1 parent 5a3a7fc commit f825477

File tree

2 files changed

+89
-13
lines changed

2 files changed

+89
-13
lines changed

coderd/telemetry/telemetry_test.go

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package telemetry_test
22

33
import (
4+
"context"
45
"database/sql"
56
"encoding/json"
67
"net/http"
@@ -115,7 +116,7 @@ func TestTelemetry(t *testing.T) {
115116
_ = dbgen.WorkspaceAgentMemoryResourceMonitor(t, db, database.WorkspaceAgentMemoryResourceMonitor{})
116117
_ = dbgen.WorkspaceAgentVolumeResourceMonitor(t, db, database.WorkspaceAgentVolumeResourceMonitor{})
117118

118-
_, snapshot := collectSnapshot(t, db, nil)
119+
_, snapshot := collectSnapshot(ctx, t, db, nil)
119120
require.Len(t, snapshot.ProvisionerJobs, 1)
120121
require.Len(t, snapshot.Licenses, 1)
121122
require.Len(t, snapshot.Templates, 1)
@@ -168,17 +169,19 @@ func TestTelemetry(t *testing.T) {
168169
})
169170
t.Run("HashedEmail", func(t *testing.T) {
170171
t.Parallel()
172+
ctx := testutil.Context(t, testutil.WaitMedium)
171173
db := dbmem.New()
172174
_ = dbgen.User(t, db, database.User{
173175
Email: "kyle@coder.com",
174176
})
175-
_, snapshot := collectSnapshot(t, db, nil)
177+
_, snapshot := collectSnapshot(ctx, t, db, nil)
176178
require.Len(t, snapshot.Users, 1)
177179
require.Equal(t, snapshot.Users[0].EmailHashed, "bb44bf07cf9a2db0554bba63a03d822c927deae77df101874496df5a6a3e896d@coder.com")
178180
})
179181
t.Run("HashedModule", func(t *testing.T) {
180182
t.Parallel()
181183
db, _ := dbtestutil.NewDB(t)
184+
ctx := testutil.Context(t, testutil.WaitMedium)
182185
pj := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{})
183186
_ = dbgen.WorkspaceModule(t, db, database.WorkspaceModule{
184187
JobID: pj.ID,
@@ -190,7 +193,7 @@ func TestTelemetry(t *testing.T) {
190193
Source: "https://internal-url.com/some-module",
191194
Version: "1.0.0",
192195
})
193-
_, snapshot := collectSnapshot(t, db, nil)
196+
_, snapshot := collectSnapshot(ctx, t, db, nil)
194197
require.Len(t, snapshot.WorkspaceModules, 2)
195198
modules := snapshot.WorkspaceModules
196199
sort.Slice(modules, func(i, j int) bool {
@@ -286,11 +289,11 @@ func TestTelemetry(t *testing.T) {
286289
db, _ := dbtestutil.NewDB(t)
287290

288291
// 1. No org sync settings
289-
deployment, _ := collectSnapshot(t, db, nil)
292+
deployment, _ := collectSnapshot(ctx, t, db, nil)
290293
require.False(t, *deployment.IDPOrgSync)
291294

292295
// 2. Org sync settings set in server flags
293-
deployment, _ = collectSnapshot(t, db, func(opts telemetry.Options) telemetry.Options {
296+
deployment, _ = collectSnapshot(ctx, t, db, func(opts telemetry.Options) telemetry.Options {
294297
opts.DeploymentConfig = &codersdk.DeploymentValues{
295298
OIDC: codersdk.OIDCConfig{
296299
OrganizationField: "organizations",
@@ -312,16 +315,17 @@ func TestTelemetry(t *testing.T) {
312315
AssignDefault: true,
313316
})
314317
require.NoError(t, err)
315-
deployment, _ = collectSnapshot(t, db, nil)
318+
deployment, _ = collectSnapshot(ctx, t, db, nil)
316319
require.True(t, *deployment.IDPOrgSync)
317320
})
318321
}
319322

320323
// nolint:paralleltest
321324
func TestTelemetryInstallSource(t *testing.T) {
322325
t.Setenv("CODER_TELEMETRY_INSTALL_SOURCE", "aws_marketplace")
326+
ctx := testutil.Context(t, testutil.WaitMedium)
323327
db := dbmem.New()
324-
deployment, _ := collectSnapshot(t, db, nil)
328+
deployment, _ := collectSnapshot(ctx, t, db, nil)
325329
require.Equal(t, "aws_marketplace", deployment.InstallSource)
326330
}
327331

@@ -436,7 +440,7 @@ func TestRecordTelemetryStatus(t *testing.T) {
436440
}
437441
}
438442

439-
func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, chan *telemetry.Snapshot) {
443+
func mockTelemetryServer(ctx context.Context, t *testing.T) (*url.URL, chan *telemetry.Deployment, chan *telemetry.Snapshot) {
440444
t.Helper()
441445
deployment := make(chan *telemetry.Deployment, 64)
442446
snapshot := make(chan *telemetry.Snapshot, 64)
@@ -446,7 +450,11 @@ func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, ch
446450
dd := &telemetry.Deployment{}
447451
err := json.NewDecoder(r.Body).Decode(dd)
448452
require.NoError(t, err)
449-
deployment <- dd
453+
ok := testutil.AssertSend(ctx, t, deployment, dd)
454+
if !ok {
455+
w.WriteHeader(http.StatusInternalServerError)
456+
return
457+
}
450458
// Ensure the header is sent only after deployment is sent
451459
w.WriteHeader(http.StatusAccepted)
452460
})
@@ -455,7 +463,11 @@ func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, ch
455463
ss := &telemetry.Snapshot{}
456464
err := json.NewDecoder(r.Body).Decode(ss)
457465
require.NoError(t, err)
458-
snapshot <- ss
466+
ok := testutil.AssertSend(ctx, t, snapshot, ss)
467+
if !ok {
468+
w.WriteHeader(http.StatusInternalServerError)
469+
return
470+
}
459471
// Ensure the header is sent only after snapshot is sent
460472
w.WriteHeader(http.StatusAccepted)
461473
})
@@ -467,10 +479,15 @@ func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, ch
467479
return serverURL, deployment, snapshot
468480
}
469481

470-
func collectSnapshot(t *testing.T, db database.Store, addOptionsFn func(opts telemetry.Options) telemetry.Options) (*telemetry.Deployment, *telemetry.Snapshot) {
482+
func collectSnapshot(
483+
ctx context.Context,
484+
t *testing.T,
485+
db database.Store,
486+
addOptionsFn func(opts telemetry.Options) telemetry.Options,
487+
) (*telemetry.Deployment, *telemetry.Snapshot) {
471488
t.Helper()
472489

473-
serverURL, deployment, snapshot := mockTelemetryServer(t)
490+
serverURL, deployment, snapshot := mockTelemetryServer(ctx, t)
474491

475492
options := telemetry.Options{
476493
Database: db,
@@ -485,5 +502,6 @@ func collectSnapshot(t *testing.T, db database.Store, addOptionsFn func(opts tel
485502
reporter, err := telemetry.New(options)
486503
require.NoError(t, err)
487504
t.Cleanup(reporter.Close)
488-
return <-deployment, <-snapshot
505+
506+
return testutil.RequireReceive(ctx, t, deployment), testutil.RequireReceive(ctx, t, snapshot)
489507
}

testutil/chan.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,61 @@ func RequireSend[A any](ctx context.Context, t testing.TB, c chan<- A, a A) {
5555
// OK!
5656
}
5757
}
58+
59+
// SoftTryReceive will attempt to receive a value from the chan and return it. If
60+
// the context expires before a value can be received, it will mark the test as
61+
// failed but continue execution. If the channel is closed, the zero value of the
62+
// channel type will be returned.
63+
// The second return value indicates whether the receive was successful. In
64+
// particular, if the channel is closed, the second return value will be true.
65+
//
66+
// Safety: can be called from any goroutine.
67+
func SoftTryReceive[A any](ctx context.Context, t testing.TB, c <-chan A) (A, bool) {
68+
t.Helper()
69+
select {
70+
case <-ctx.Done():
71+
t.Error("timeout")
72+
var a A
73+
return a, false
74+
case a := <-c:
75+
return a, true
76+
}
77+
}
78+
79+
// AssertReceive will receive a value from the chan and return it. If the
80+
// context expires or the channel is closed before a value can be received,
81+
// it will mark the test as failed but continue execution.
82+
// The second return value indicates whether the receive was successful.
83+
//
84+
// Safety: can be called from any goroutine.
85+
func AssertReceive[A any](ctx context.Context, t testing.TB, c <-chan A) (A, bool) {
86+
t.Helper()
87+
select {
88+
case <-ctx.Done():
89+
t.Error("timeout")
90+
var a A
91+
return a, false
92+
case a, ok := <-c:
93+
if !ok {
94+
t.Error("channel closed")
95+
}
96+
return a, ok
97+
}
98+
}
99+
100+
// AssertSend will send the given value over the chan and then return. If
101+
// the context expires before the send succeeds, it will mark the test as failed
102+
// but continue execution.
103+
// The second return value indicates whether the send was successful.
104+
//
105+
// Safety: can be called from any goroutine.
106+
func AssertSend[A any](ctx context.Context, t testing.TB, c chan<- A, a A) bool {
107+
t.Helper()
108+
select {
109+
case <-ctx.Done():
110+
t.Error("timeout")
111+
return false
112+
case c <- a:
113+
return true
114+
}
115+
}

0 commit comments

Comments
 (0)