Skip to content

Commit 15c1560

Browse files
committed
feat: add telemetry to user-scoped tailnet API call
1 parent 674f60f commit 15c1560

File tree

3 files changed

+128
-7
lines changed

3 files changed

+128
-7
lines changed

coderd/coderdtest/coderdtest.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ import (
5252
"cdr.dev/slog"
5353
"cdr.dev/slog/sloggers/sloghuman"
5454
"cdr.dev/slog/sloggers/slogtest"
55+
"github.com/coder/quartz"
56+
5557
"github.com/coder/coder/v2/coderd"
5658
"github.com/coder/coder/v2/coderd/audit"
5759
"github.com/coder/coder/v2/coderd/autobuild"
@@ -91,7 +93,6 @@ import (
9193
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
9294
"github.com/coder/coder/v2/tailnet"
9395
"github.com/coder/coder/v2/testutil"
94-
"github.com/coder/quartz"
9596
)
9697

9798
type Options struct {
@@ -170,6 +171,7 @@ type Options struct {
170171
APIKeyEncryptionCache cryptokeys.EncryptionKeycache
171172
OIDCConvertKeyCache cryptokeys.SigningKeycache
172173
Clock quartz.Clock
174+
TelemetryReporter telemetry.Reporter
173175
}
174176

175177
// New constructs a codersdk client connected to an in-memory API instance.
@@ -358,6 +360,10 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
358360
hangDetector.Start()
359361
t.Cleanup(hangDetector.Close)
360362

363+
if options.TelemetryReporter == nil {
364+
options.TelemetryReporter = telemetry.NewNoop()
365+
}
366+
361367
// Did last_used_at not update? Scratching your noggin? Here's why.
362368
// Workspace usage tracking must be triggered manually in tests.
363369
// The vast majority of existing tests do not depend on last_used_at
@@ -517,7 +523,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
517523
LoginRateLimit: options.LoginRateLimit,
518524
FilesRateLimit: options.FilesRateLimit,
519525
Authorizer: options.Authorizer,
520-
Telemetry: telemetry.NewNoop(),
526+
Telemetry: options.TelemetryReporter,
521527
TemplateScheduleStore: &templateScheduleStore,
522528
AccessControlStore: accessControlStore,
523529
TLSCertificates: options.TLSCertificates,

coderd/workspaceagents.go

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import (
2323
"tailscale.com/tailcfg"
2424

2525
"cdr.dev/slog"
26+
"github.com/coder/websocket"
27+
2628
"github.com/coder/coder/v2/coderd/agentapi"
2729
"github.com/coder/coder/v2/coderd/database"
2830
"github.com/coder/coder/v2/coderd/database/db2sdk"
@@ -34,6 +36,7 @@ import (
3436
"github.com/coder/coder/v2/coderd/jwtutils"
3537
"github.com/coder/coder/v2/coderd/rbac"
3638
"github.com/coder/coder/v2/coderd/rbac/policy"
39+
"github.com/coder/coder/v2/coderd/telemetry"
3740
maputil "github.com/coder/coder/v2/coderd/util/maps"
3841
"github.com/coder/coder/v2/coderd/wspubsub"
3942
"github.com/coder/coder/v2/codersdk"
@@ -42,7 +45,6 @@ import (
4245
"github.com/coder/coder/v2/codersdk/wsjson"
4346
"github.com/coder/coder/v2/tailnet"
4447
"github.com/coder/coder/v2/tailnet/proto"
45-
"github.com/coder/websocket"
4648
)
4749

4850
// @Summary Get workspace agent by ID
@@ -1635,6 +1637,33 @@ func (api *API) tailnetRPCConn(rw http.ResponseWriter, r *http.Request) {
16351637
defer wsNetConn.Close()
16361638
defer conn.Close(websocket.StatusNormalClosure, "")
16371639

1640+
// Get user ID for telemetry
1641+
apiKey := httpmw.APIKey(r)
1642+
userID := apiKey.UserID.String()
1643+
1644+
// Store connection telemetry event
1645+
now := time.Now()
1646+
connectionTelemetryEvent := telemetry.UserTailnetConnection{
1647+
ConnectedAt: now,
1648+
DisconnectedAt: nil,
1649+
UserID: userID,
1650+
PeerID: peerID.String(),
1651+
DeviceID: nil,
1652+
DeviceOS: nil,
1653+
CoderDesktopVersion: nil,
1654+
}
1655+
api.Telemetry.Report(&telemetry.Snapshot{
1656+
UserTailnetConnections: []telemetry.UserTailnetConnection{connectionTelemetryEvent},
1657+
})
1658+
defer func() {
1659+
// Update telemetry event with disconnection time
1660+
disconnectTime := time.Now()
1661+
connectionTelemetryEvent.DisconnectedAt = &disconnectTime
1662+
api.Telemetry.Report(&telemetry.Snapshot{
1663+
UserTailnetConnections: []telemetry.UserTailnetConnection{connectionTelemetryEvent},
1664+
})
1665+
}()
1666+
16381667
go httpapi.Heartbeat(ctx, conn)
16391668
err = api.TailnetClientService.ServeClient(ctx, version, wsNetConn, tailnet.StreamID{
16401669
Name: "client",

coderd/workspaceagents_test.go

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ import (
2929

3030
"cdr.dev/slog"
3131
"cdr.dev/slog/sloggers/slogtest"
32+
"github.com/coder/quartz"
33+
"github.com/coder/websocket"
34+
3235
"github.com/coder/coder/v2/agent"
3336
"github.com/coder/coder/v2/agent/agentcontainers"
3437
"github.com/coder/coder/v2/agent/agentcontainers/acmock"
@@ -47,6 +50,7 @@ import (
4750
"github.com/coder/coder/v2/coderd/externalauth"
4851
"github.com/coder/coder/v2/coderd/jwtutils"
4952
"github.com/coder/coder/v2/coderd/rbac"
53+
"github.com/coder/coder/v2/coderd/telemetry"
5054
"github.com/coder/coder/v2/codersdk"
5155
"github.com/coder/coder/v2/codersdk/agentsdk"
5256
"github.com/coder/coder/v2/codersdk/workspacesdk"
@@ -56,8 +60,6 @@ import (
5660
tailnetproto "github.com/coder/coder/v2/tailnet/proto"
5761
"github.com/coder/coder/v2/tailnet/tailnettest"
5862
"github.com/coder/coder/v2/testutil"
59-
"github.com/coder/quartz"
60-
"github.com/coder/websocket"
6163
)
6264

6365
func TestWorkspaceAgent(t *testing.T) {
@@ -2133,35 +2135,54 @@ func TestOwnedWorkspacesCoordinate(t *testing.T) {
21332135

21342136
ctx := testutil.Context(t, testutil.WaitLong)
21352137
logger := testutil.Logger(t)
2138+
2139+
fTelemetry := newFakeTelemetryReporter(ctx, t, 200)
21362140
firstClient, _, api := coderdtest.NewWithAPI(t, &coderdtest.Options{
2137-
Coordinator: tailnet.NewCoordinator(logger),
2141+
Coordinator: tailnet.NewCoordinator(logger),
2142+
TelemetryReporter: fTelemetry,
21382143
})
21392144
firstUser := coderdtest.CreateFirstUser(t, firstClient)
21402145
member, memberUser := coderdtest.CreateAnotherUser(t, firstClient, firstUser.OrganizationID, rbac.RoleTemplateAdmin())
21412146

21422147
// Create a workspace with an agent
21432148
firstWorkspace := buildWorkspaceWithAgent(t, member, firstUser.OrganizationID, memberUser.ID, api.Database, api.Pubsub)
21442149

2150+
// clean out telemetry snapshots from the setup; we don't care about them for this test
2151+
for len(fTelemetry.snapshots) != 0 {
2152+
<-fTelemetry.snapshots
2153+
}
2154+
21452155
u, err := member.URL.Parse("/api/v2/tailnet")
21462156
require.NoError(t, err)
21472157
q := u.Query()
21482158
q.Set("version", "2.0")
21492159
u.RawQuery = q.Encode()
21502160

2161+
predialTime := time.Now()
2162+
21512163
//nolint:bodyclose // websocket package closes this for you
21522164
wsConn, resp, err := websocket.Dial(ctx, u.String(), &websocket.DialOptions{
21532165
HTTPHeader: http.Header{
21542166
"Coder-Session-Token": []string{member.SessionToken()},
21552167
},
21562168
})
21572169
if err != nil {
2158-
if resp.StatusCode != http.StatusSwitchingProtocols {
2170+
if resp != nil && resp.StatusCode != http.StatusSwitchingProtocols {
21592171
err = codersdk.ReadBodyAsError(resp)
21602172
}
21612173
require.NoError(t, err)
21622174
}
21632175
defer wsConn.Close(websocket.StatusNormalClosure, "done")
21642176

2177+
// Check telemetry
2178+
snapshot := testutil.RequireRecvCtx(ctx, t, fTelemetry.snapshots)
2179+
require.Len(t, snapshot.UserTailnetConnections, 1)
2180+
telemetryConnection := snapshot.UserTailnetConnections[0]
2181+
require.Equal(t, memberUser.ID.String(), telemetryConnection.UserID)
2182+
require.GreaterOrEqual(t, telemetryConnection.ConnectedAt, predialTime)
2183+
require.LessOrEqual(t, telemetryConnection.ConnectedAt, time.Now())
2184+
require.NotEmpty(t, telemetryConnection.PeerID)
2185+
21652186
rpcClient, err := tailnet.NewDRPCClient(
21662187
websocket.NetConn(ctx, wsConn, websocket.MessageBinary),
21672188
logger,
@@ -2209,6 +2230,23 @@ func TestOwnedWorkspacesCoordinate(t *testing.T) {
22092230
NumAgents: 0,
22102231
},
22112232
})
2233+
err = stream.Close()
2234+
require.NoError(t, err)
2235+
2236+
beforeDisconnectTime := time.Now()
2237+
err = wsConn.Close(websocket.StatusNormalClosure, "done")
2238+
require.NoError(t, err)
2239+
2240+
snapshot = testutil.RequireRecvCtx(ctx, t, fTelemetry.snapshots)
2241+
require.Len(t, snapshot.UserTailnetConnections, 1)
2242+
telemetryDisconnection := snapshot.UserTailnetConnections[0]
2243+
require.Equal(t, memberUser.ID.String(), telemetryDisconnection.UserID)
2244+
require.Equal(t, telemetryConnection.ConnectedAt, telemetryDisconnection.ConnectedAt)
2245+
require.Equal(t, telemetryConnection.UserID, telemetryDisconnection.UserID)
2246+
require.Equal(t, telemetryConnection.PeerID, telemetryDisconnection.PeerID)
2247+
require.NotNil(t, telemetryDisconnection.DisconnectedAt)
2248+
require.GreaterOrEqual(t, *telemetryDisconnection.DisconnectedAt, beforeDisconnectTime)
2249+
require.LessOrEqual(t, *telemetryDisconnection.DisconnectedAt, time.Now())
22122250
}
22132251

22142252
func buildWorkspaceWithAgent(
@@ -2334,3 +2372,51 @@ func waitForUpdates(
23342372
t.Fatal("Timeout waiting for desired state", currentState)
23352373
}
23362374
}
2375+
2376+
// fakeTelemetryReporter is a fake implementation of telemetry.Reporter
2377+
// that sends snapshots on a buffered channel, useful for testing.
2378+
type fakeTelemetryReporter struct {
2379+
enabled bool
2380+
snapshots chan *telemetry.Snapshot
2381+
t testing.TB
2382+
ctx context.Context
2383+
}
2384+
2385+
// newFakeTelemetryReporter creates a new fakeTelemetryReporter with a buffered channel.
2386+
// The buffer size determines how many snapshots can be reported before blocking.
2387+
func newFakeTelemetryReporter(ctx context.Context, t testing.TB, bufferSize int) *fakeTelemetryReporter {
2388+
return &fakeTelemetryReporter{
2389+
enabled: true,
2390+
snapshots: make(chan *telemetry.Snapshot, bufferSize),
2391+
ctx: ctx,
2392+
t: t,
2393+
}
2394+
}
2395+
2396+
// Report implements the telemetry.Reporter interface by sending the snapshot
2397+
// to the snapshots channel.
2398+
func (f *fakeTelemetryReporter) Report(snapshot *telemetry.Snapshot) {
2399+
if !f.enabled {
2400+
return
2401+
}
2402+
2403+
select {
2404+
case f.snapshots <- snapshot:
2405+
// Successfully sent
2406+
case <-f.ctx.Done():
2407+
f.t.Error("context closed while writing snapshot")
2408+
}
2409+
}
2410+
2411+
// Enabled implements the telemetry.Reporter interface.
2412+
func (f *fakeTelemetryReporter) Enabled() bool {
2413+
return f.enabled
2414+
}
2415+
2416+
// SetEnabled allows controlling whether the reporter is enabled.
2417+
func (f *fakeTelemetryReporter) SetEnabled(enabled bool) {
2418+
f.enabled = enabled
2419+
}
2420+
2421+
// Close implements the telemetry.Reporter interface.
2422+
func (f *fakeTelemetryReporter) Close() {}

0 commit comments

Comments
 (0)