Skip to content

fix(coderd): workspaceapps: update last_used_at when workspace app reports stats #11603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,15 @@ func (q *querier) ArchiveUnusedTemplateVersions(ctx context.Context, arg databas
return q.db.ArchiveUnusedTemplateVersions(ctx, arg)
}

func (q *querier) BatchUpdateWorkspaceLastUsedAt(ctx context.Context, arg database.BatchUpdateWorkspaceLastUsedAtParams) error {
// Could be any workspace and checking auth to each workspace is overkill for the purpose
// of this function.
if err := q.authorizeContext(ctx, rbac.ActionUpdate, rbac.ResourceWorkspace.All()); err != nil {
return err
}
return q.db.BatchUpdateWorkspaceLastUsedAt(ctx, arg)
}

func (q *querier) CleanTailnetCoordinators(ctx context.Context) error {
if err := q.authorizeContext(ctx, rbac.ActionDelete, rbac.ResourceTailnetCoordinator); err != nil {
return err
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbauthz/dbauthz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1549,6 +1549,13 @@ func (s *MethodTestSuite) TestWorkspace() {
ID: ws.ID,
}).Asserts(ws, rbac.ActionUpdate).Returns()
}))
s.Run("BatchUpdateWorkspaceLastUsedAt", s.Subtest(func(db database.Store, check *expects) {
ws1 := dbgen.Workspace(s.T(), db, database.Workspace{})
ws2 := dbgen.Workspace(s.T(), db, database.Workspace{})
check.Args(database.BatchUpdateWorkspaceLastUsedAtParams{
IDs: []uuid.UUID{ws1.ID, ws2.ID},
}).Asserts(rbac.ResourceWorkspace.All(), rbac.ActionUpdate).Returns()
}))
s.Run("UpdateWorkspaceTTL", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
check.Args(database.UpdateWorkspaceTTLParams{
Expand Down
25 changes: 25 additions & 0 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,31 @@ func (q *FakeQuerier) ArchiveUnusedTemplateVersions(_ context.Context, arg datab
return archived, nil
}

func (q *FakeQuerier) BatchUpdateWorkspaceLastUsedAt(_ context.Context, arg database.BatchUpdateWorkspaceLastUsedAtParams) error {
err := validateDatabaseType(arg)
if err != nil {
return err
}

q.mutex.Lock()
defer q.mutex.Unlock()

// temporary map to avoid O(q.workspaces*arg.workspaceIds)
m := make(map[uuid.UUID]struct{})
for _, id := range arg.IDs {
m[id] = struct{}{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be before the mutex lock, but it's such a minor perf change not sure it's worth changing.

}
n := 0
for i := 0; i < len(q.workspaces); i++ {
if _, found := m[q.workspaces[i].ID]; !found {
continue
}
q.workspaces[i].LastUsedAt = arg.LastUsedAt
n++
}
return nil
}

func (*FakeQuerier) CleanTailnetCoordinators(_ context.Context) error {
return ErrUnimplemented
}
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbmetrics/dbmetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions coderd/database/queries/workspaces.sql
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,14 @@ SET
WHERE
id = $1;

-- name: BatchUpdateWorkspaceLastUsedAt :exec
UPDATE
workspaces
SET
last_used_at = @last_used_at
WHERE
id = ANY(@ids :: uuid[]);

-- name: GetDeploymentWorkspaceStats :one
WITH workspaces_with_jobs AS (
SELECT
Expand Down
3 changes: 2 additions & 1 deletion coderd/httpapi/websocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import (
"context"
"time"

"cdr.dev/slog"
"nhooyr.io/websocket"

"cdr.dev/slog"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: is it expected or is the linter impaired?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be my linter.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine, it's how these are usually grouped (coder/coder, cdr.dev, etc grouped together, separate from stdlib and 3rd party).

)

// Heartbeat loops to ping a WebSocket to keep it alive.
Expand Down
2 changes: 1 addition & 1 deletion coderd/workspaceagents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1626,7 +1626,7 @@ func TestWorkspaceAgentExternalAuthListen(t *testing.T) {
cancel()
// We expect only 1
// In a failed test, you will likely see 9, as the last one
// gets cancelled.
// gets canceled.
require.Equal(t, 1, validateCalls, "validate calls duplicated on same token")
})
}
56 changes: 48 additions & 8 deletions coderd/workspaceapps/apptest/apptest.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
// reconnecting-pty proxy server we want to test is mounted.
client := appDetails.AppClient(t)
testReconnectingPTY(ctx, t, client, appDetails.Agent.ID, "")
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})

t.Run("SignedTokenQueryParameter", func(t *testing.T) {
Expand Down Expand Up @@ -92,6 +93,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
// Make an unauthenticated client.
unauthedAppClient := codersdk.New(appDetails.AppClient(t).URL)
testReconnectingPTY(ctx, t, unauthedAppClient, appDetails.Agent.ID, issueRes.SignedToken)
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})
})

Expand All @@ -117,6 +119,9 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Contains(t, string(body), "Path-based applications are disabled")
// Even though path-based apps are disabled, the request should indicate
// that the workspace was used.
assertWorkspaceLastUsedAtNotUpdated(t, appDetails)
})

t.Run("LoginWithoutAuthOnPrimary", func(t *testing.T) {
Expand All @@ -142,6 +147,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
require.NoError(t, err)
require.True(t, loc.Query().Has("message"))
require.True(t, loc.Query().Has("redirect"))
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})

t.Run("LoginWithoutAuthOnProxy", func(t *testing.T) {
Expand Down Expand Up @@ -179,6 +185,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
// request is getting stripped.
require.Equal(t, u.Path, redirectURI.Path+"/")
require.Equal(t, u.RawQuery, redirectURI.RawQuery)
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})

t.Run("NoAccessShould404", func(t *testing.T) {
Expand All @@ -195,6 +202,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusNotFound, resp.StatusCode)
// TODO(cian): A blocked request should not count as workspace usage.
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails)
Comment on lines +205 to +206
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: leaving this here to address in a follow-up. As this is now tied to stats collection, we bump whenever there are app usage stats for a workspace.

})

t.Run("RedirectsWithSlash", func(t *testing.T) {
Expand All @@ -209,6 +218,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusTemporaryRedirect, resp.StatusCode)
// TODO(cian): The initial redirect should not count as workspace usage.
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails)
Comment on lines +221 to +222
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: follow-up

})

t.Run("RedirectsWithQuery", func(t *testing.T) {
Expand All @@ -226,6 +237,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
loc, err := resp.Location()
require.NoError(t, err)
require.Equal(t, proxyTestAppQuery, loc.RawQuery)
// TODO(cian): The initial redirect should not count as workspace usage.
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails)
Comment on lines +240 to +241
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: follow-up

})

t.Run("Proxies", func(t *testing.T) {
Expand Down Expand Up @@ -267,6 +280,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
require.NoError(t, err)
require.Equal(t, proxyTestAppBody, string(body))
require.Equal(t, http.StatusOK, resp.StatusCode)
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})

t.Run("ProxiesHTTPS", func(t *testing.T) {
Expand Down Expand Up @@ -312,6 +326,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
require.NoError(t, err)
require.Equal(t, proxyTestAppBody, string(body))
require.Equal(t, http.StatusOK, resp.StatusCode)
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})

t.Run("BlocksMe", func(t *testing.T) {
Expand All @@ -331,6 +346,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Contains(t, string(body), "must be accessed with the full username, not @me")
// TODO(cian): A blocked request should not count as workspace usage.
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails)
Comment on lines +349 to +350
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: follow-up

})

t.Run("ForwardsIP", func(t *testing.T) {
Expand All @@ -349,6 +366,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
require.Equal(t, proxyTestAppBody, string(body))
require.Equal(t, http.StatusOK, resp.StatusCode)
require.Equal(t, "1.1.1.1,127.0.0.1", resp.Header.Get("X-Forwarded-For"))
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})

t.Run("ProxyError", func(t *testing.T) {
Expand All @@ -361,6 +379,9 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusBadGateway, resp.StatusCode)
// An valid authenticated attempt to access a workspace app
// should count as usage regardless of success.
assertWorkspaceLastUsedAtUpdated(t, appDetails)
})

t.Run("NoProxyPort", func(t *testing.T) {
Expand All @@ -375,6 +396,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
// TODO(@deansheather): This should be 400. There's a todo in the
// resolve request code to fix this.
require.Equal(t, http.StatusInternalServerError, resp.StatusCode)
assertWorkspaceLastUsedAtUpdated(t, appDetails)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: I'm counting this as a successful and valid attempt to access a workspace.

})
})

Expand Down Expand Up @@ -1430,16 +1452,12 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
t.Run("ReportStats", func(t *testing.T) {
t.Parallel()

flush := make(chan chan<- struct{}, 1)

reporter := &fakeStatsReporter{}
appDetails := setupProxyTest(t, &DeploymentOptions{
StatsCollectorOptions: workspaceapps.StatsCollectorOptions{
Reporter: reporter,
ReportInterval: time.Hour,
RollupWindow: time.Minute,

Flush: flush,
},
})

Expand All @@ -1457,10 +1475,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) {
var stats []workspaceapps.StatsReport
require.Eventually(t, func() bool {
// Keep flushing until we get a non-empty stats report.
flushDone := make(chan struct{}, 1)
flush <- flushDone
<-flushDone

appDetails.FlushStats()
stats = reporter.stats()
return len(stats) > 0
}, testutil.WaitLong, testutil.IntervalFast, "stats not reported")
Expand Down Expand Up @@ -1549,3 +1564,28 @@ func testReconnectingPTY(ctx context.Context, t *testing.T, client *codersdk.Cli
// Ensure the connection closes.
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF)
}

// Accessing an app should update the workspace's LastUsedAt.
// NOTE: Despite our efforts with the flush channel, this is inherently racy.
func assertWorkspaceLastUsedAtUpdated(t testing.TB, details *Details) {
t.Helper()

// Wait for stats to fully flush.
require.Eventually(t, func() bool {
details.FlushStats()
ws, err := details.SDKClient.Workspace(context.Background(), details.Workspace.ID)
assert.NoError(t, err)
return ws.LastUsedAt.After(details.Workspace.LastUsedAt)
}, testutil.WaitShort, testutil.IntervalMedium, "workspace LastUsedAt not updated when it should have been")
}

// Except when it sometimes shouldn't (e.g. no access)
// NOTE: Despite our efforts with the flush channel, this is inherently racy.
func assertWorkspaceLastUsedAtNotUpdated(t testing.TB, details *Details) {
t.Helper()

details.FlushStats()
ws, err := details.SDKClient.Workspace(context.Background(), details.Workspace.ID)
require.NoError(t, err)
require.Equal(t, ws.LastUsedAt, details.Workspace.LastUsedAt, "workspace LastUsedAt updated when it should not have been")
}
1 change: 1 addition & 0 deletions coderd/workspaceapps/apptest/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type Deployment struct {
SDKClient *codersdk.Client
FirstUser codersdk.CreateFirstUserResponse
PathAppBaseURL *url.URL
FlushStats func()
}

// DeploymentFactory generates a deployment with an API client, a path base URL,
Expand Down
26 changes: 21 additions & 5 deletions coderd/workspaceapps/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/util/slice"
)

const (
Expand Down Expand Up @@ -117,11 +118,25 @@ func (r *StatsDBReporter) Report(ctx context.Context, stats []StatsReport) error
batch.Requests = batch.Requests[:0]
}
}
if len(batch.UserID) > 0 {
err := tx.InsertWorkspaceAppStats(ctx, batch)
if err != nil {
return err
}
if len(batch.UserID) == 0 {
return nil
}

if err := tx.InsertWorkspaceAppStats(ctx, batch); err != nil {
return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One could make a case for us trying to do the last used update even if this fails, wdyt?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inserting app stats is just doing a big insert into a single table (which IIRC is unlogged), so if we run into issues doing that my gut tells me that any further database queries might not be successful. Might not hurt to try, but I'm not sure how much the extra complexity would be worth it. Bear in mind that we will end up just trying to do this again in another 30 seconds!

}

// TODO: We currently measure workspace usage based on when we get stats from it.
// There are currently two paths for this:
// 1) From SSH -> workspace agent stats POSTed from agent
// 2) From workspace apps / rpty -> workspace app stats (from coderd / wsproxy)
// Ideally we would have a single code path for this.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could perhaps have a single endpoint, but I think it's unlikely we can have all the data originate from one place. At least not whilst retaining the behavior where trying to access a workspace results in usage.

In the single endpoint scenario, we'd still have both agent and wsproxy pinging the endpoint based on usage.

uniqueIDs := slice.Unique(batch.WorkspaceID)
if err := tx.BatchUpdateWorkspaceLastUsedAt(ctx, database.BatchUpdateWorkspaceLastUsedAtParams{
IDs: uniqueIDs,
LastUsedAt: dbtime.Now(), // This isn't 100% accurate, but it's good enough.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, it's close enough. We could ensure we use max time from the stats if we want slightly more accuracy (still off for some workspaces, though).

}); err != nil {
return err
}

return nil
Expand Down Expand Up @@ -234,6 +249,7 @@ func (sc *StatsCollector) Collect(report StatsReport) {
}
delete(sc.statsBySessionID, report.SessionID)
}
sc.opts.Logger.Debug(sc.ctx, "collected workspace app stats", slog.F("report", report))
}

// rollup performs stats rollup for sessions that fall within the
Expand Down
Loading