Skip to content

Daily Active User Metrics #3735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Sep 1, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
502aa52
agent: add ConnStats
ammario Aug 29, 2022
3893045
agent: add StatsReporter
ammario Aug 29, 2022
c03ad90
Frontend tests pass
ammario Sep 1, 2022
1bd9cec
Split DAUChart into its own file
ammario Sep 1, 2022
e46329b
Get FE tests passing with real data!
ammario Sep 1, 2022
d472b13
Test frontend
ammario Sep 1, 2022
e0295e0
Fix compilation error
ammario Sep 1, 2022
2f1a423
Rename ConnStats to StatsConn
ammario Sep 1, 2022
0a50cc9
continues instead of returns
ammario Sep 1, 2022
7feab5e
Fix some test failures
ammario Sep 1, 2022
a4d2cf7
Redo tests
ammario Sep 1, 2022
52c9d10
Address review comments
ammario Sep 1, 2022
3f9901e
REVAMP — backend tests work
ammario Sep 1, 2022
7840509
Black triangle
ammario Sep 1, 2022
39170cf
Consolidate template state machines
ammario Sep 1, 2022
eb373d6
Move workspaceagent endpoint
ammario Sep 1, 2022
a3d87b8
Address most review comments
ammario Sep 1, 2022
31ba0c6
Improve contrast in chart
ammario Sep 1, 2022
5b906c1
Add more agent tests
ammario Sep 1, 2022
49d9386
Fix JS ci
ammario Sep 1, 2022
b14a077
A bunch of minor touch ups
ammario Sep 1, 2022
8da24c4
Stabilize protoc
ammario Sep 1, 2022
00ec953
Merge remote-tracking branch 'origin/main' into metrics
ammario Sep 1, 2022
4940319
Update lockfile
ammario Sep 1, 2022
22b2028
Address comments + attempt to fix protoc
ammario Sep 1, 2022
0157365
fixup! Address comments + attempt to fix protoc
ammario Sep 1, 2022
b166cdd
Try to fix protoc...
ammario Sep 1, 2022
4201998
PROTO!
ammario Sep 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
A bunch of minor touch ups
  • Loading branch information
ammario committed Sep 1, 2022
commit b14a077e4787fa6432099a07ff4f7533c9cf388c
7 changes: 3 additions & 4 deletions agent/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ var _ net.Conn = new(StatsConn)

// Stats records the Agent's network connection statistics for use in
// user-facing metrics and debugging.
// Each member value must be written and read with atomic.
type Stats struct {
NumConns int64 `json:"num_comms"`
// RxBytes must be read with atomic.
RxBytes int64 `json:"rx_bytes"`
// TxBytes must be read with atomic.
TxBytes int64 `json:"tx_bytes"`
RxBytes int64 `json:"rx_bytes"`
TxBytes int64 `json:"tx_bytes"`
}

func (s *Stats) Copy() *Stats {
Expand Down
3 changes: 3 additions & 0 deletions cli/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,9 @@ func Server(newAPI func(*coderd.Options) *coderd.API) *cobra.Command {
_ = root.Flags().MarkHidden("spooky")
cliflag.BoolVarP(root.Flags(), &verbose, "verbose", "v", "CODER_VERBOSE", false, "Enables verbose logging.")

// These metrics flags are for manually testing the metric system.
// The defaults should be acceptable for any Coder deployment of any
// reasonable size.
cliflag.DurationVarP(root.Flags(), &metricsCacheRefreshInterval, "metrics-cache-refresh-interval", "", "CODER_METRICS_CACHE_REFRESH_INTERVAL", time.Hour, "How frequently metrics are refreshed")
_ = root.Flags().MarkHidden("metrics-cache-refresh-interval")
cliflag.DurationVarP(root.Flags(), &agentStatRefreshInterval, "agent-stats-refresh-interval", "", "CODER_AGENT_STATS_REFRESH_INTERVAL", time.Minute*10, "How frequently agent stats are recorded")
Expand Down
1 change: 0 additions & 1 deletion coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ type Options struct {
TailnetCoordinator *tailnet.Coordinator
DERPMap *tailcfg.DERPMap

// Metrics related intervals.
MetricsCacheRefreshInterval time.Duration
AgentStatsRefreshInterval time.Duration
}
Expand Down
14 changes: 8 additions & 6 deletions coderd/metricscache/metricscache.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ import (
"github.com/coder/retry"
)

// Cache holds the DAU cache.
// The aggregation queries responsible for these values
// can take up to a minute on large deployments, but the cache has near zero
// effect on most deployments.
// Cache holds the template DAU cache.
// The aggregation queries responsible for these values can take up to a minute
// on large deployments. Even in small deployments, aggregation queries can
// take a few hundred milliseconds, which would ruin page load times and
// database performance if in the hot path.
type Cache struct {
database database.Store
log slog.Logger
Expand Down Expand Up @@ -153,8 +154,9 @@ func (c *Cache) Close() error {
return nil
}

// DAUs returns the DAUs or nil if they aren't ready yet.
func (c *Cache) DAUs(id uuid.UUID) codersdk.TemplateDAUsResponse {
// TemplateDAUs returns an empty response if the template doesn't have users
// or is loading for the first time.
func (c *Cache) TemplateDAUs(id uuid.UUID) codersdk.TemplateDAUsResponse {
m := c.templateDAUResponses.Load()
if m == nil {
// Data loading.
Expand Down
2 changes: 1 addition & 1 deletion coderd/metricscache/metricscache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func TestCache(t *testing.T) {
var got codersdk.TemplateDAUsResponse

require.Eventuallyf(t, func() bool {
got = cache.DAUs(templateID)
got = cache.TemplateDAUs(templateID)
return reflect.DeepEqual(got.Entries, tt.want)
}, testutil.WaitShort, testutil.IntervalFast,
"GetDAUs() = %v, want %v", got, tt.want,
Expand Down
4 changes: 0 additions & 4 deletions coderd/rbac/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,6 @@ var (
ResourceLicense = Object{
Type: "license",
}

ResourceMetrics = Object{
Type: "metrics",
}
)

// Object is used to create objects for authz checks when you have none in
Expand Down
2 changes: 1 addition & 1 deletion coderd/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ func (api *API) templateDAUs(rw http.ResponseWriter, r *http.Request) {
return
}

resp := api.metricsCache.DAUs(template.ID)
resp := api.metricsCache.TemplateDAUs(template.ID)
if resp.Entries == nil {
resp.Entries = []codersdk.DAUEntry{}
}
Expand Down
3 changes: 3 additions & 0 deletions coderd/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,9 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
}

// Avoid inserting duplicate rows to preserve DB space.
// We will see duplicate reports when on idle connections
// (e.g. web terminal left open) or when there are no connections at
// all.
var insert = !reflect.DeepEqual(lastReport, rep)

api.Logger.Debug(ctx, "read stats report",
Expand Down
4 changes: 2 additions & 2 deletions codersdk/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ func (e *Error) Error() string {
return builder.String()
}

type CloseFunc func() error
type closeFunc func() error

func (c CloseFunc) Close() error {
func (c closeFunc) Close() error {
return c()
}
146 changes: 0 additions & 146 deletions codersdk/metrics.go

This file was deleted.

41 changes: 41 additions & 0 deletions codersdk/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,44 @@ func (c *Client) TemplateVersionByName(ctx context.Context, template uuid.UUID,
var templateVersion TemplateVersion
return templateVersion, json.NewDecoder(res.Body).Decode(&templateVersion)
}

type DAUEntry struct {
Date time.Time `json:"date"`
DAUs int `json:"daus"`
}

type TemplateDAUsResponse struct {
Entries []DAUEntry `json:"entries"`
}

func (c *Client) TemplateDAUs(ctx context.Context, templateID uuid.UUID) (*TemplateDAUsResponse, error) {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/templates/%s/daus", templateID), nil)
if err != nil {
return nil, xerrors.Errorf("execute request: %w", err)
}
defer res.Body.Close()

if res.StatusCode != http.StatusOK {
return nil, readBodyAsError(res)
}

var resp TemplateDAUsResponse
return &resp, json.NewDecoder(res.Body).Decode(&resp)
}

// @typescript-ignore AgentStatsReportRequest

// AgentStatsReportRequest is a WebSocket request by coderd
// to the agent for stats.
type AgentStatsReportRequest struct {
}

// AgentStatsReportResponse is returned for each report
// request by the agent.
type AgentStatsReportResponse struct {
NumConns int64 `json:"num_comms"`
// RxBytes is the number of received bytes.
RxBytes int64 `json:"rx_bytes"`
// TxBytes is the number of received bytes.
TxBytes int64 `json:"tx_bytes"`
}
85 changes: 85 additions & 0 deletions codersdk/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"golang.org/x/net/proxy"
"golang.org/x/xerrors"
"nhooyr.io/websocket"
"nhooyr.io/websocket/wsjson"
"tailscale.com/tailcfg"

"cdr.dev/slog"
Expand Down Expand Up @@ -528,3 +529,87 @@ func (c *Client) turnProxyDialer(ctx context.Context, httpClient *http.Client, p
return websocket.NetConn(ctx, conn, websocket.MessageBinary), nil
})
}

// AgentReportStats begins a stat streaming connection with the Coder server.
// It is resilient to network failures and intermittent coderd issues.
func (c *Client) AgentReportStats(
ctx context.Context,
log slog.Logger,
stats func() *agent.Stats,
) (io.Closer, error) {
serverURL, err := c.URL.Parse("/api/v2/workspaceagents/me/report-stats")
if err != nil {
return nil, xerrors.Errorf("parse url: %w", err)
}

jar, err := cookiejar.New(nil)
if err != nil {
return nil, xerrors.Errorf("create cookie jar: %w", err)
}

jar.SetCookies(serverURL, []*http.Cookie{{
Name: SessionTokenKey,
Value: c.SessionToken,
}})

httpClient := &http.Client{
Jar: jar,
}

doneCh := make(chan struct{})
ctx, cancel := context.WithCancel(ctx)

go func() {
defer close(doneCh)

// If the agent connection succeeds for a while, then fails, then succeeds
// for a while (etc.) the retry may hit the maximum. This is a normal
// case for long-running agents that experience coderd upgrades, so
// we use a short maximum retry limit.
for r := retry.New(time.Second, time.Minute); r.Wait(ctx); {
err = func() error {
conn, res, err := websocket.Dial(ctx, serverURL.String(), &websocket.DialOptions{
HTTPClient: httpClient,
// Need to disable compression to avoid a data-race.
CompressionMode: websocket.CompressionDisabled,
})
if err != nil {
if res == nil {
return err
}
return readBodyAsError(res)
}

for {
var req AgentStatsReportRequest
err := wsjson.Read(ctx, conn, &req)
if err != nil {
return err
}

s := stats()

resp := AgentStatsReportResponse{
NumConns: s.NumConns,
RxBytes: s.RxBytes,
TxBytes: s.TxBytes,
}

err = wsjson.Write(ctx, conn, resp)
if err != nil {
return err
}
}
}()
if err != nil && ctx.Err() == nil {
log.Error(ctx, "report stats", slog.Error(err))
}
}
}()

return closeFunc(func() error {
cancel()
<-doneCh
return nil
}), nil
}