-
Notifications
You must be signed in to change notification settings - Fork 928
fix(coderd): workspaceapps: update last_used_at when workspace app reports stats #11603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
39b846f
d858bac
07176ef
ef8d3ab
601136c
184af27
1f9012e
862b1ff
f522ea6
b714d69
8573ba7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,8 +4,9 @@ import ( | |
"context" | ||
"time" | ||
|
||
"cdr.dev/slog" | ||
"nhooyr.io/websocket" | ||
|
||
"cdr.dev/slog" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: is it expected or is the linter impaired? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be my linter. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is fine, it's how these are usually grouped (coder/coder, cdr.dev, etc grouped together, separate from stdlib and 3rd party). |
||
) | ||
|
||
// Heartbeat loops to ping a WebSocket to keep it alive. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,6 +64,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
// reconnecting-pty proxy server we want to test is mounted. | ||
client := appDetails.AppClient(t) | ||
testReconnectingPTY(ctx, t, client, appDetails.Agent.ID, "") | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("SignedTokenQueryParameter", func(t *testing.T) { | ||
|
@@ -92,6 +93,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
// Make an unauthenticated client. | ||
unauthedAppClient := codersdk.New(appDetails.AppClient(t).URL) | ||
testReconnectingPTY(ctx, t, unauthedAppClient, appDetails.Agent.ID, issueRes.SignedToken) | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
}) | ||
|
||
|
@@ -117,6 +119,9 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
body, err := io.ReadAll(resp.Body) | ||
require.NoError(t, err) | ||
require.Contains(t, string(body), "Path-based applications are disabled") | ||
// Even though path-based apps are disabled, the request should indicate | ||
// that the workspace was used. | ||
assertWorkspaceLastUsedAtNotUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("LoginWithoutAuthOnPrimary", func(t *testing.T) { | ||
|
@@ -142,6 +147,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
require.NoError(t, err) | ||
require.True(t, loc.Query().Has("message")) | ||
require.True(t, loc.Query().Has("redirect")) | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("LoginWithoutAuthOnProxy", func(t *testing.T) { | ||
|
@@ -179,6 +185,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
// request is getting stripped. | ||
require.Equal(t, u.Path, redirectURI.Path+"/") | ||
require.Equal(t, u.RawQuery, redirectURI.RawQuery) | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("NoAccessShould404", func(t *testing.T) { | ||
|
@@ -195,6 +202,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
require.NoError(t, err) | ||
defer resp.Body.Close() | ||
require.Equal(t, http.StatusNotFound, resp.StatusCode) | ||
// TODO(cian): A blocked request should not count as workspace usage. | ||
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails) | ||
Comment on lines
+205
to
+206
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. review: leaving this here to address in a follow-up. As this is now tied to stats collection, we bump whenever there are app usage stats for a workspace. |
||
}) | ||
|
||
t.Run("RedirectsWithSlash", func(t *testing.T) { | ||
|
@@ -209,6 +218,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
require.NoError(t, err) | ||
defer resp.Body.Close() | ||
require.Equal(t, http.StatusTemporaryRedirect, resp.StatusCode) | ||
// TODO(cian): The initial redirect should not count as workspace usage. | ||
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails) | ||
Comment on lines
+221
to
+222
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. review: follow-up |
||
}) | ||
|
||
t.Run("RedirectsWithQuery", func(t *testing.T) { | ||
|
@@ -226,6 +237,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
loc, err := resp.Location() | ||
require.NoError(t, err) | ||
require.Equal(t, proxyTestAppQuery, loc.RawQuery) | ||
// TODO(cian): The initial redirect should not count as workspace usage. | ||
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails) | ||
Comment on lines
+240
to
+241
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. review: follow-up |
||
}) | ||
|
||
t.Run("Proxies", func(t *testing.T) { | ||
|
@@ -267,6 +280,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
require.NoError(t, err) | ||
require.Equal(t, proxyTestAppBody, string(body)) | ||
require.Equal(t, http.StatusOK, resp.StatusCode) | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("ProxiesHTTPS", func(t *testing.T) { | ||
|
@@ -312,6 +326,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
require.NoError(t, err) | ||
require.Equal(t, proxyTestAppBody, string(body)) | ||
require.Equal(t, http.StatusOK, resp.StatusCode) | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("BlocksMe", func(t *testing.T) { | ||
|
@@ -331,6 +346,8 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
body, err := io.ReadAll(resp.Body) | ||
require.NoError(t, err) | ||
require.Contains(t, string(body), "must be accessed with the full username, not @me") | ||
// TODO(cian): A blocked request should not count as workspace usage. | ||
// assertWorkspaceLastUsedAtNotUpdated(t, appDetails.AppClient(t), appDetails) | ||
Comment on lines
+349
to
+350
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. review: follow-up |
||
}) | ||
|
||
t.Run("ForwardsIP", func(t *testing.T) { | ||
|
@@ -349,6 +366,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
require.Equal(t, proxyTestAppBody, string(body)) | ||
require.Equal(t, http.StatusOK, resp.StatusCode) | ||
require.Equal(t, "1.1.1.1,127.0.0.1", resp.Header.Get("X-Forwarded-For")) | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("ProxyError", func(t *testing.T) { | ||
|
@@ -361,6 +379,9 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
require.NoError(t, err) | ||
defer resp.Body.Close() | ||
require.Equal(t, http.StatusBadGateway, resp.StatusCode) | ||
// An valid authenticated attempt to access a workspace app | ||
// should count as usage regardless of success. | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
}) | ||
|
||
t.Run("NoProxyPort", func(t *testing.T) { | ||
|
@@ -375,6 +396,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
// TODO(@deansheather): This should be 400. There's a todo in the | ||
// resolve request code to fix this. | ||
require.Equal(t, http.StatusInternalServerError, resp.StatusCode) | ||
assertWorkspaceLastUsedAtUpdated(t, appDetails) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. review: I'm counting this as a successful and valid attempt to access a workspace. |
||
}) | ||
}) | ||
|
||
|
@@ -1430,16 +1452,12 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
t.Run("ReportStats", func(t *testing.T) { | ||
t.Parallel() | ||
|
||
flush := make(chan chan<- struct{}, 1) | ||
|
||
reporter := &fakeStatsReporter{} | ||
appDetails := setupProxyTest(t, &DeploymentOptions{ | ||
StatsCollectorOptions: workspaceapps.StatsCollectorOptions{ | ||
Reporter: reporter, | ||
ReportInterval: time.Hour, | ||
RollupWindow: time.Minute, | ||
|
||
Flush: flush, | ||
}, | ||
}) | ||
|
||
|
@@ -1457,10 +1475,7 @@ func Run(t *testing.T, appHostIsPrimary bool, factory DeploymentFactory) { | |
var stats []workspaceapps.StatsReport | ||
require.Eventually(t, func() bool { | ||
// Keep flushing until we get a non-empty stats report. | ||
flushDone := make(chan struct{}, 1) | ||
flush <- flushDone | ||
<-flushDone | ||
|
||
appDetails.FlushStats() | ||
stats = reporter.stats() | ||
return len(stats) > 0 | ||
}, testutil.WaitLong, testutil.IntervalFast, "stats not reported") | ||
|
@@ -1549,3 +1564,28 @@ func testReconnectingPTY(ctx context.Context, t *testing.T, client *codersdk.Cli | |
// Ensure the connection closes. | ||
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF) | ||
} | ||
|
||
// Accessing an app should update the workspace's LastUsedAt. | ||
// NOTE: Despite our efforts with the flush channel, this is inherently racy. | ||
func assertWorkspaceLastUsedAtUpdated(t testing.TB, details *Details) { | ||
t.Helper() | ||
|
||
// Wait for stats to fully flush. | ||
require.Eventually(t, func() bool { | ||
details.FlushStats() | ||
ws, err := details.SDKClient.Workspace(context.Background(), details.Workspace.ID) | ||
assert.NoError(t, err) | ||
return ws.LastUsedAt.After(details.Workspace.LastUsedAt) | ||
}, testutil.WaitShort, testutil.IntervalMedium, "workspace LastUsedAt not updated when it should have been") | ||
} | ||
|
||
// Except when it sometimes shouldn't (e.g. no access) | ||
// NOTE: Despite our efforts with the flush channel, this is inherently racy. | ||
func assertWorkspaceLastUsedAtNotUpdated(t testing.TB, details *Details) { | ||
t.Helper() | ||
|
||
details.FlushStats() | ||
ws, err := details.SDKClient.Workspace(context.Background(), details.Workspace.ID) | ||
require.NoError(t, err) | ||
require.Equal(t, ws.LastUsedAt, details.Workspace.LastUsedAt, "workspace LastUsedAt updated when it should not have been") | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ import ( | |
"github.com/coder/coder/v2/coderd/database" | ||
"github.com/coder/coder/v2/coderd/database/dbauthz" | ||
"github.com/coder/coder/v2/coderd/database/dbtime" | ||
"github.com/coder/coder/v2/coderd/util/slice" | ||
) | ||
|
||
const ( | ||
|
@@ -117,11 +118,25 @@ func (r *StatsDBReporter) Report(ctx context.Context, stats []StatsReport) error | |
batch.Requests = batch.Requests[:0] | ||
} | ||
} | ||
if len(batch.UserID) > 0 { | ||
err := tx.InsertWorkspaceAppStats(ctx, batch) | ||
if err != nil { | ||
return err | ||
} | ||
if len(batch.UserID) == 0 { | ||
return nil | ||
} | ||
|
||
if err := tx.InsertWorkspaceAppStats(ctx, batch); err != nil { | ||
return err | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One could make a case for us trying to do the last used update even if this fails, wdyt? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inserting app stats is just doing a big insert into a single table (which IIRC is unlogged), so if we run into issues doing that my gut tells me that any further database queries might not be successful. Might not hurt to try, but I'm not sure how much the extra complexity would be worth it. Bear in mind that we will end up just trying to do this again in another 30 seconds! |
||
} | ||
|
||
// TODO: We currently measure workspace usage based on when we get stats from it. | ||
// There are currently two paths for this: | ||
// 1) From SSH -> workspace agent stats POSTed from agent | ||
// 2) From workspace apps / rpty -> workspace app stats (from coderd / wsproxy) | ||
// Ideally we would have a single code path for this. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could perhaps have a single endpoint, but I think it's unlikely we can have all the data originate from one place. At least not whilst retaining the behavior where trying to access a workspace results in usage. In the single endpoint scenario, we'd still have both agent and wsproxy pinging the endpoint based on usage. |
||
uniqueIDs := slice.Unique(batch.WorkspaceID) | ||
if err := tx.BatchUpdateWorkspaceLastUsedAt(ctx, database.BatchUpdateWorkspaceLastUsedAtParams{ | ||
IDs: uniqueIDs, | ||
LastUsedAt: dbtime.Now(), // This isn't 100% accurate, but it's good enough. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO, it's close enough. We could ensure we use max time from the stats if we want slightly more accuracy (still off for some workspaces, though). |
||
}); err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
|
@@ -234,6 +249,7 @@ func (sc *StatsCollector) Collect(report StatsReport) { | |
} | ||
delete(sc.statsBySessionID, report.SessionID) | ||
} | ||
sc.opts.Logger.Debug(sc.ctx, "collected workspace app stats", slog.F("report", report)) | ||
} | ||
|
||
// rollup performs stats rollup for sessions that fall within the | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could be before the mutex lock, but it's such a minor perf change not sure it's worth changing.