Skip to content

Commit e0ecc28

Browse files
authored
feat: add telemetry to user-scoped tailnet API call (coder#17065)
Adds support for sending telemetry on calls to the User-scoped tailnet RPC endpoint. This is currently used only by Coder Desktop. Later PRs will fill in the version, OS information, and device ID via HTTP headers.
1 parent 6bf22f8 commit e0ecc28

File tree

3 files changed

+122
-7
lines changed

3 files changed

+122
-7
lines changed

coderd/coderdtest/coderdtest.go

+8-2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ import (
5252
"cdr.dev/slog"
5353
"cdr.dev/slog/sloggers/sloghuman"
5454
"cdr.dev/slog/sloggers/slogtest"
55+
"github.com/coder/quartz"
56+
5557
"github.com/coder/coder/v2/coderd"
5658
"github.com/coder/coder/v2/coderd/audit"
5759
"github.com/coder/coder/v2/coderd/autobuild"
@@ -91,7 +93,6 @@ import (
9193
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
9294
"github.com/coder/coder/v2/tailnet"
9395
"github.com/coder/coder/v2/testutil"
94-
"github.com/coder/quartz"
9596
)
9697

9798
type Options struct {
@@ -170,6 +171,7 @@ type Options struct {
170171
APIKeyEncryptionCache cryptokeys.EncryptionKeycache
171172
OIDCConvertKeyCache cryptokeys.SigningKeycache
172173
Clock quartz.Clock
174+
TelemetryReporter telemetry.Reporter
173175
}
174176

175177
// New constructs a codersdk client connected to an in-memory API instance.
@@ -358,6 +360,10 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
358360
hangDetector.Start()
359361
t.Cleanup(hangDetector.Close)
360362

363+
if options.TelemetryReporter == nil {
364+
options.TelemetryReporter = telemetry.NewNoop()
365+
}
366+
361367
// Did last_used_at not update? Scratching your noggin? Here's why.
362368
// Workspace usage tracking must be triggered manually in tests.
363369
// The vast majority of existing tests do not depend on last_used_at
@@ -517,7 +523,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
517523
LoginRateLimit: options.LoginRateLimit,
518524
FilesRateLimit: options.FilesRateLimit,
519525
Authorizer: options.Authorizer,
520-
Telemetry: telemetry.NewNoop(),
526+
Telemetry: options.TelemetryReporter,
521527
TemplateScheduleStore: &templateScheduleStore,
522528
AccessControlStore: accessControlStore,
523529
TLSCertificates: options.TLSCertificates,

coderd/workspaceagents.go

+30-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import (
2323
"tailscale.com/tailcfg"
2424

2525
"cdr.dev/slog"
26+
"github.com/coder/websocket"
27+
2628
"github.com/coder/coder/v2/coderd/agentapi"
2729
"github.com/coder/coder/v2/coderd/database"
2830
"github.com/coder/coder/v2/coderd/database/db2sdk"
@@ -34,6 +36,7 @@ import (
3436
"github.com/coder/coder/v2/coderd/jwtutils"
3537
"github.com/coder/coder/v2/coderd/rbac"
3638
"github.com/coder/coder/v2/coderd/rbac/policy"
39+
"github.com/coder/coder/v2/coderd/telemetry"
3740
maputil "github.com/coder/coder/v2/coderd/util/maps"
3841
"github.com/coder/coder/v2/coderd/wspubsub"
3942
"github.com/coder/coder/v2/codersdk"
@@ -42,7 +45,6 @@ import (
4245
"github.com/coder/coder/v2/codersdk/wsjson"
4346
"github.com/coder/coder/v2/tailnet"
4447
"github.com/coder/coder/v2/tailnet/proto"
45-
"github.com/coder/websocket"
4648
)
4749

4850
// @Summary Get workspace agent by ID
@@ -1635,6 +1637,33 @@ func (api *API) tailnetRPCConn(rw http.ResponseWriter, r *http.Request) {
16351637
defer wsNetConn.Close()
16361638
defer conn.Close(websocket.StatusNormalClosure, "")
16371639

1640+
// Get user ID for telemetry
1641+
apiKey := httpmw.APIKey(r)
1642+
userID := apiKey.UserID.String()
1643+
1644+
// Store connection telemetry event
1645+
now := time.Now()
1646+
connectionTelemetryEvent := telemetry.UserTailnetConnection{
1647+
ConnectedAt: now,
1648+
DisconnectedAt: nil,
1649+
UserID: userID,
1650+
PeerID: peerID.String(),
1651+
DeviceID: nil,
1652+
DeviceOS: nil,
1653+
CoderDesktopVersion: nil,
1654+
}
1655+
api.Telemetry.Report(&telemetry.Snapshot{
1656+
UserTailnetConnections: []telemetry.UserTailnetConnection{connectionTelemetryEvent},
1657+
})
1658+
defer func() {
1659+
// Update telemetry event with disconnection time
1660+
disconnectTime := time.Now()
1661+
connectionTelemetryEvent.DisconnectedAt = &disconnectTime
1662+
api.Telemetry.Report(&telemetry.Snapshot{
1663+
UserTailnetConnections: []telemetry.UserTailnetConnection{connectionTelemetryEvent},
1664+
})
1665+
}()
1666+
16381667
go httpapi.Heartbeat(ctx, conn)
16391668
err = api.TailnetClientService.ServeClient(ctx, version, wsNetConn, tailnet.StreamID{
16401669
Name: "client",

coderd/workspaceagents_test.go

+84-4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ import (
2929

3030
"cdr.dev/slog"
3131
"cdr.dev/slog/sloggers/slogtest"
32+
"github.com/coder/quartz"
33+
"github.com/coder/websocket"
34+
3235
"github.com/coder/coder/v2/agent"
3336
"github.com/coder/coder/v2/agent/agentcontainers"
3437
"github.com/coder/coder/v2/agent/agentcontainers/acmock"
@@ -47,6 +50,7 @@ import (
4750
"github.com/coder/coder/v2/coderd/externalauth"
4851
"github.com/coder/coder/v2/coderd/jwtutils"
4952
"github.com/coder/coder/v2/coderd/rbac"
53+
"github.com/coder/coder/v2/coderd/telemetry"
5054
"github.com/coder/coder/v2/codersdk"
5155
"github.com/coder/coder/v2/codersdk/agentsdk"
5256
"github.com/coder/coder/v2/codersdk/workspacesdk"
@@ -56,8 +60,6 @@ import (
5660
tailnetproto "github.com/coder/coder/v2/tailnet/proto"
5761
"github.com/coder/coder/v2/tailnet/tailnettest"
5862
"github.com/coder/coder/v2/testutil"
59-
"github.com/coder/quartz"
60-
"github.com/coder/websocket"
6163
)
6264

6365
func TestWorkspaceAgent(t *testing.T) {
@@ -2133,35 +2135,53 @@ func TestOwnedWorkspacesCoordinate(t *testing.T) {
21332135

21342136
ctx := testutil.Context(t, testutil.WaitLong)
21352137
logger := testutil.Logger(t)
2138+
2139+
fTelemetry := newFakeTelemetryReporter(ctx, t, 200)
2140+
fTelemetry.enabled = false
21362141
firstClient, _, api := coderdtest.NewWithAPI(t, &coderdtest.Options{
2137-
Coordinator: tailnet.NewCoordinator(logger),
2142+
Coordinator: tailnet.NewCoordinator(logger),
2143+
TelemetryReporter: fTelemetry,
21382144
})
21392145
firstUser := coderdtest.CreateFirstUser(t, firstClient)
21402146
member, memberUser := coderdtest.CreateAnotherUser(t, firstClient, firstUser.OrganizationID, rbac.RoleTemplateAdmin())
21412147

21422148
// Create a workspace with an agent
21432149
firstWorkspace := buildWorkspaceWithAgent(t, member, firstUser.OrganizationID, memberUser.ID, api.Database, api.Pubsub)
21442150

2151+
// enable telemetry now that workspace is built; we don't care about snapshots before this.
2152+
fTelemetry.enabled = true
2153+
21452154
u, err := member.URL.Parse("/api/v2/tailnet")
21462155
require.NoError(t, err)
21472156
q := u.Query()
21482157
q.Set("version", "2.0")
21492158
u.RawQuery = q.Encode()
21502159

2160+
predialTime := time.Now()
2161+
21512162
//nolint:bodyclose // websocket package closes this for you
21522163
wsConn, resp, err := websocket.Dial(ctx, u.String(), &websocket.DialOptions{
21532164
HTTPHeader: http.Header{
21542165
"Coder-Session-Token": []string{member.SessionToken()},
21552166
},
21562167
})
21572168
if err != nil {
2158-
if resp.StatusCode != http.StatusSwitchingProtocols {
2169+
if resp != nil && resp.StatusCode != http.StatusSwitchingProtocols {
21592170
err = codersdk.ReadBodyAsError(resp)
21602171
}
21612172
require.NoError(t, err)
21622173
}
21632174
defer wsConn.Close(websocket.StatusNormalClosure, "done")
21642175

2176+
// Check telemetry
2177+
snapshot := testutil.RequireRecvCtx(ctx, t, fTelemetry.snapshots)
2178+
require.Len(t, snapshot.UserTailnetConnections, 1)
2179+
telemetryConnection := snapshot.UserTailnetConnections[0]
2180+
require.Equal(t, memberUser.ID.String(), telemetryConnection.UserID)
2181+
require.GreaterOrEqual(t, telemetryConnection.ConnectedAt, predialTime)
2182+
require.LessOrEqual(t, telemetryConnection.ConnectedAt, time.Now())
2183+
require.NotEmpty(t, telemetryConnection.PeerID)
2184+
21652185
rpcClient, err := tailnet.NewDRPCClient(
21662186
websocket.NetConn(ctx, wsConn, websocket.MessageBinary),
21672187
logger,
@@ -2209,6 +2229,23 @@ func TestOwnedWorkspacesCoordinate(t *testing.T) {
22092229
NumAgents: 0,
22102230
},
22112231
})
2232+
err = stream.Close()
2233+
require.NoError(t, err)
2234+
2235+
beforeDisconnectTime := time.Now()
2236+
err = wsConn.Close(websocket.StatusNormalClosure, "done")
2237+
require.NoError(t, err)
2238+
2239+
snapshot = testutil.RequireRecvCtx(ctx, t, fTelemetry.snapshots)
2240+
require.Len(t, snapshot.UserTailnetConnections, 1)
2241+
telemetryDisconnection := snapshot.UserTailnetConnections[0]
2242+
require.Equal(t, memberUser.ID.String(), telemetryDisconnection.UserID)
2243+
require.Equal(t, telemetryConnection.ConnectedAt, telemetryDisconnection.ConnectedAt)
2244+
require.Equal(t, telemetryConnection.UserID, telemetryDisconnection.UserID)
2245+
require.Equal(t, telemetryConnection.PeerID, telemetryDisconnection.PeerID)
2246+
require.NotNil(t, telemetryDisconnection.DisconnectedAt)
2247+
require.GreaterOrEqual(t, *telemetryDisconnection.DisconnectedAt, beforeDisconnectTime)
2248+
require.LessOrEqual(t, *telemetryDisconnection.DisconnectedAt, time.Now())
22122249
}
22132250

22142251
func buildWorkspaceWithAgent(
@@ -2334,3 +2371,46 @@ func waitForUpdates(
23342371
t.Fatal("Timeout waiting for desired state", currentState)
23352372
}
23362373
}
2374+
2375+
// fakeTelemetryReporter is a fake implementation of telemetry.Reporter
2376+
// that sends snapshots on a buffered channel, useful for testing.
2377+
type fakeTelemetryReporter struct {
2378+
enabled bool
2379+
snapshots chan *telemetry.Snapshot
2380+
t testing.TB
2381+
ctx context.Context
2382+
}
2383+
2384+
// newFakeTelemetryReporter creates a new fakeTelemetryReporter with a buffered channel.
2385+
// The buffer size determines how many snapshots can be reported before blocking.
2386+
func newFakeTelemetryReporter(ctx context.Context, t testing.TB, bufferSize int) *fakeTelemetryReporter {
2387+
return &fakeTelemetryReporter{
2388+
enabled: true,
2389+
snapshots: make(chan *telemetry.Snapshot, bufferSize),
2390+
ctx: ctx,
2391+
t: t,
2392+
}
2393+
}
2394+
2395+
// Report implements the telemetry.Reporter interface by sending the snapshot
2396+
// to the snapshots channel.
2397+
func (f *fakeTelemetryReporter) Report(snapshot *telemetry.Snapshot) {
2398+
if !f.enabled {
2399+
return
2400+
}
2401+
2402+
select {
2403+
case f.snapshots <- snapshot:
2404+
// Successfully sent
2405+
case <-f.ctx.Done():
2406+
f.t.Error("context closed while writing snapshot")
2407+
}
2408+
}
2409+
2410+
// Enabled implements the telemetry.Reporter interface.
2411+
func (f *fakeTelemetryReporter) Enabled() bool {
2412+
return f.enabled
2413+
}
2414+
2415+
// Close implements the telemetry.Reporter interface.
2416+
func (*fakeTelemetryReporter) Close() {}

0 commit comments

Comments
 (0)