Skip to content

feat: Add anonymized telemetry to report product usage #2273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 17, 2022
Prev Previous commit
Next Next commit
Add reporting for workspaces
  • Loading branch information
kylecarbs committed Jun 16, 2022
commit 09fdadef87e7bce66fdc4522db8561e70e7de6cd
40 changes: 20 additions & 20 deletions cli/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ func server() *cobra.Command {
SSHKeygenAlgorithm: sshKeygenAlgorithm,
TURNServer: turnServer,
TracerProvider: tracerProvider,
Telemetry: telemetry.NewNoop(),
}

if oauth2GithubClientSecret != "" {
Expand Down Expand Up @@ -310,28 +311,27 @@ func server() *cobra.Command {
if err != nil {
return xerrors.Errorf("parse telemetry url: %w", err)
}
// Disable telemetry if in dev-mode. If the telemetry flag
// is manually specified, override this behavior!
if !cmd.Flags().Changed("telemetry-enable") {
// Disable telemetry if the in-memory database is used unless explicitly defined!
if inMemoryDatabase && !cmd.Flags().Changed("telemetry") {
telemetryEnable = false
}
reporter, err := telemetry.New(telemetry.Options{
BuiltinPostgres: builtinPostgres,
DeploymentID: deploymentID,
Database: options.Database,
Logger: logger.Named("telemetry"),
URL: telemetryURL,
Disabled: !telemetryEnable,
GitHubOAuth: oauth2GithubClientID != "",
Prometheus: promEnabled,
STUN: len(stunServers) != 0,
Tunnel: tunnel,
})
if err != nil {
return xerrors.Errorf("create telemetry reporter: %w", err)
if telemetryEnable {
options.Telemetry, err = telemetry.New(telemetry.Options{
BuiltinPostgres: builtinPostgres,
DeploymentID: deploymentID,
Database: options.Database,
Logger: logger.Named("telemetry"),
URL: telemetryURL,
GitHubOAuth: oauth2GithubClientID != "",
Prometheus: promEnabled,
STUN: len(stunServers) != 0,
Tunnel: tunnel,
})
if err != nil {
return xerrors.Errorf("create telemetry reporter: %w", err)
}
defer options.Telemetry.Close()
}
defer reporter.Close()
options.Telemetry = reporter

coderAPI := coderd.New(options)
client := codersdk.New(localURL)
Expand Down Expand Up @@ -533,7 +533,7 @@ func server() *cobra.Command {
"Specifies organizations the user must be a member of to authenticate with GitHub.")
cliflag.BoolVarP(root.Flags(), &oauth2GithubAllowSignups, "oauth2-github-allow-signups", "", "CODER_OAUTH2_GITHUB_ALLOW_SIGNUPS", false,
"Specifies whether new users can sign up with GitHub.")
cliflag.BoolVarP(root.Flags(), &telemetryEnable, "telemetry-enable", "", "CODER_TELEMETRY_ENABLE", true, "Specifies whether telemetry is enabled or not. Coder collects anonymized usage data to help improve our product!")
cliflag.BoolVarP(root.Flags(), &telemetryEnable, "telemetry", "", "CODER_TELEMETRY", true, "Specifies whether telemetry is enabled or not. Coder collects anonymized usage data to help improve our product!")
cliflag.StringVarP(root.Flags(), &telemetryURL, "telemetry-url", "", "CODER_TELEMETRY_URL", "https://telemetry.coder.com", "Specifies a URL to send telemetry to.")
_ = root.Flags().MarkHidden("telemetry-url")
cliflag.BoolVarP(root.Flags(), &tlsEnable, "tls-enable", "", "CODER_TLS_ENABLE", false, "Specifies if TLS will be enabled")
Expand Down
4 changes: 1 addition & 3 deletions cli/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,8 @@ func TestServer(t *testing.T) {
server := httptest.NewServer(r)
t.Cleanup(server.Close)

root, _ := clitest.New(t, "server", "--dev", "--tunnel=false", "--address", ":0", "--telemetry-enable", "true", "--telemetry-url", server.URL)
var buf strings.Builder
root, _ := clitest.New(t, "server", "--in-memory", "--address", ":0", "--telemetry", "--telemetry-url", server.URL)
errC := make(chan error)
root.SetOutput(&buf)
go func() {
errC <- root.ExecuteContext(ctx)
}()
Expand Down
2 changes: 1 addition & 1 deletion coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type Options struct {
ICEServers []webrtc.ICEServer
SecureAuthCookie bool
SSHKeygenAlgorithm gitsshkey.Algorithm
Telemetry *telemetry.Reporter
Telemetry telemetry.Reporter
TURNServer *turnconn.Server
TracerProvider *sdktrace.TracerProvider
}
Expand Down
2 changes: 2 additions & 0 deletions coderd/coderdtest/coderdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"time"

"github.com/coder/coder/coderd/rbac"
"github.com/coder/coder/coderd/telemetry"
"github.com/coder/coder/coderd/util/ptr"

"cloud.google.com/go/compute/metadata"
Expand Down Expand Up @@ -164,6 +165,7 @@ func NewWithAPI(t *testing.T, options *Options) (*codersdk.Client, *coderd.API)
TURNServer: turnServer,
APIRateLimit: options.APIRateLimit,
Authorizer: options.Authorizer,
Telemetry: telemetry.NewNoop(),
})
srv.Config.Handler = coderAPI.Handler
if options.IncludeProvisionerD {
Expand Down
155 changes: 86 additions & 69 deletions coderd/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,8 @@ type Options struct {
// URL is an endpoint to direct telemetry towards!
URL *url.URL

BuiltinPostgres bool
DeploymentID string
// Disabled determines whether telemetry will be collected
// and sent. This allows callers to still execute the API
// without having to check whether it's enabled.
Disabled bool
BuiltinPostgres bool
DeploymentID string
GitHubOAuth bool
Prometheus bool
STUN bool
Expand All @@ -53,12 +49,7 @@ type Options struct {
// New constructs a reporter for telemetry data.
// Duplicate data will be sent, it's on the server-side to index by UUID.
// Data is anonymized prior to being sent!
func New(options Options) (*Reporter, error) {
if options.Disabled {
return &Reporter{
options: options,
}, nil
}
func New(options Options) (Reporter, error) {
if options.SnapshotFrequency == 0 {
// Report once every 30mins by default!
options.SnapshotFrequency = 30 * time.Minute
Expand All @@ -73,7 +64,7 @@ func New(options Options) (*Reporter, error) {
}

ctx, cancelFunc := context.WithCancel(context.Background())
reporter := &Reporter{
reporter := &remoteReporter{
ctx: ctx,
closed: make(chan struct{}),
closeFunc: cancelFunc,
Expand All @@ -82,14 +73,26 @@ func New(options Options) (*Reporter, error) {
snapshotURL: snapshotURL,
startedAt: database.Now(),
}
if !options.Disabled {
go reporter.runSnapshotter()
}
go reporter.runSnapshotter()
return reporter, nil
}

// NewNoop creates a new telemetry reporter that entirely discards all requests.
func NewNoop() Reporter {
return &noopReporter{}
}

// Reporter sends data to the telemetry server.
type Reporter struct {
type Reporter interface {
// Report sends a snapshot to the telemetry server.
// The contents of the snapshot can be a partial representation of the
// database. For example, if a new user is added, a snapshot can
// contain just that user entry.
Report(snapshot *Snapshot)
Close()
}

type remoteReporter struct {
ctx context.Context
closed chan struct{}
closeMutex sync.Mutex
Expand All @@ -102,14 +105,7 @@ type Reporter struct {
shutdownAt *time.Time
}

// Report sends a snapshot to the telemetry server.
// The contents of the snapshot can be a partial representation of the
// database. For example, if a new user is added, a snapshot can
// contain just that user entry.
func (r *Reporter) Report(snapshot *Snapshot) {
if r.options.Disabled {
return
}
func (r *remoteReporter) Report(snapshot *Snapshot) {
snapshot.DeploymentID = r.options.DeploymentID

// Runs in a goroutine so it's non-blocking to callers!
Expand Down Expand Up @@ -140,10 +136,7 @@ func (r *Reporter) Report(snapshot *Snapshot) {
}()
}

func (r *Reporter) Close() {
if r.options.Disabled {
return
}
func (r *remoteReporter) Close() {
r.closeMutex.Lock()
defer r.closeMutex.Unlock()
if r.isClosed() {
Expand All @@ -159,7 +152,7 @@ func (r *Reporter) Close() {
r.closeFunc()
}

func (r *Reporter) isClosed() bool {
func (r *remoteReporter) isClosed() bool {
select {
case <-r.closed:
return true
Expand All @@ -168,7 +161,7 @@ func (r *Reporter) isClosed() bool {
}
}

func (r *Reporter) runSnapshotter() {
func (r *remoteReporter) runSnapshotter() {
first := true
ticker := time.NewTicker(r.options.SnapshotFrequency)
defer ticker.Stop()
Expand All @@ -192,7 +185,7 @@ func (r *Reporter) runSnapshotter() {
}
}

func (r *Reporter) reportWithDeployment() {
func (r *remoteReporter) reportWithDeployment() {
// Submit deployment information before creating a snapshot!
// This is separated from the snapshot API call to reduce
// duplicate data from being inserted. Snapshot may be called
Expand All @@ -214,7 +207,7 @@ func (r *Reporter) reportWithDeployment() {
}

// deployment collects host information and reports it to the telemetry server.
func (r *Reporter) deployment() error {
func (r *remoteReporter) deployment() error {
sysInfoHost, err := sysinfo.Host()
if err != nil {
return xerrors.Errorf("get host info: %w", err)
Expand Down Expand Up @@ -269,7 +262,7 @@ func (r *Reporter) deployment() error {
}

// createSnapshot collects a full snapshot from the database.
func (r *Reporter) createSnapshot() (*Snapshot, error) {
func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
var (
ctx = r.ctx
// For resources that grow in size very quickly (like workspace builds),
Expand Down Expand Up @@ -315,17 +308,7 @@ func (r *Reporter) createSnapshot() (*Snapshot, error) {
}
snapshot.Templates = make([]Template, 0, len(templates))
for _, dbTemplate := range templates {
snapshot.Templates = append(snapshot.Templates, Template{
ID: dbTemplate.ID,
CreatedBy: dbTemplate.CreatedBy,
CreatedAt: dbTemplate.CreatedAt,
UpdatedAt: dbTemplate.UpdatedAt,
OrganizationID: dbTemplate.OrganizationID,
Deleted: dbTemplate.Deleted,
ActiveVersionID: dbTemplate.ActiveVersionID,
Name: dbTemplate.Name,
Description: dbTemplate.Description != "",
})
snapshot.Templates = append(snapshot.Templates, ConvertTemplate(dbTemplate))
}
return nil
})
Expand All @@ -336,16 +319,7 @@ func (r *Reporter) createSnapshot() (*Snapshot, error) {
}
snapshot.TemplateVersions = make([]TemplateVersion, 0, len(templateVersions))
for _, version := range templateVersions {
snapVersion := TemplateVersion{
ID: version.ID,
CreatedAt: version.CreatedAt,
OrganizationID: version.OrganizationID,
JobID: version.JobID,
}
if version.TemplateID.Valid {
snapVersion.TemplateID = &version.TemplateID.UUID
}
snapshot.TemplateVersions = append(snapshot.TemplateVersions, snapVersion)
snapshot.TemplateVersions = append(snapshot.TemplateVersions, ConvertTemplateVersion(version))
}
return nil
})
Expand All @@ -356,6 +330,9 @@ func (r *Reporter) createSnapshot() (*Snapshot, error) {
}
var firstUser database.User
for _, dbUser := range users {
if dbUser.Status != database.UserStatusActive {
continue
}
if firstUser.CreatedAt.IsZero() {
firstUser = dbUser
}
Expand All @@ -365,20 +342,8 @@ func (r *Reporter) createSnapshot() (*Snapshot, error) {
}
snapshot.Users = make([]User, 0, len(users))
for _, dbUser := range users {
emailHashed := ""
atSymbol := strings.LastIndex(dbUser.Email, "@")
if atSymbol >= 0 {
// We hash the beginning of the user to allow for indexing users
// by email between deployments.
hash := sha256.Sum256([]byte(dbUser.Email[:atSymbol]))
emailHashed = fmt.Sprintf("%x%s", hash[:], dbUser.Email[atSymbol:])
}
user := User{
ID: dbUser.ID,
EmailHashed: emailHashed,
RBACRoles: dbUser.RBACRoles,
CreatedAt: dbUser.CreatedAt,
}
user := ConvertUser(dbUser)
// If it's the first user, we'll send the email!
if firstUser.ID == dbUser.ID {
user.Email = dbUser.Email
}
Expand Down Expand Up @@ -533,6 +498,53 @@ func ConvertWorkspaceResource(resource database.WorkspaceResource) WorkspaceReso
}
}

// ConvertUser anonymizes a user.
func ConvertUser(dbUser database.User) User {
emailHashed := ""
atSymbol := strings.LastIndex(dbUser.Email, "@")
if atSymbol >= 0 {
// We hash the beginning of the user to allow for indexing users
// by email between deployments.
hash := sha256.Sum256([]byte(dbUser.Email[:atSymbol]))
emailHashed = fmt.Sprintf("%x%s", hash[:], dbUser.Email[atSymbol:])
}
return User{
ID: dbUser.ID,
EmailHashed: emailHashed,
RBACRoles: dbUser.RBACRoles,
CreatedAt: dbUser.CreatedAt,
}
}

// ConvertTemplate anonymizes a template.
func ConvertTemplate(dbTemplate database.Template) Template {
return Template{
ID: dbTemplate.ID,
CreatedBy: dbTemplate.CreatedBy,
CreatedAt: dbTemplate.CreatedAt,
UpdatedAt: dbTemplate.UpdatedAt,
OrganizationID: dbTemplate.OrganizationID,
Deleted: dbTemplate.Deleted,
ActiveVersionID: dbTemplate.ActiveVersionID,
Name: dbTemplate.Name,
Description: dbTemplate.Description != "",
}
}

// ConvertTemplateVersion anonymizes a template version.
func ConvertTemplateVersion(version database.TemplateVersion) TemplateVersion {
snapVersion := TemplateVersion{
ID: version.ID,
CreatedAt: version.CreatedAt,
OrganizationID: version.OrganizationID,
JobID: version.JobID,
}
if version.TemplateID.Valid {
snapVersion.TemplateID = &version.TemplateID.UUID
}
return snapVersion
}

// Snapshot represents a point-in-time anonymized database dump.
// Data is aggregated by latest on the server-side, so partial data
// can be sent without issue.
Expand Down Expand Up @@ -668,3 +680,8 @@ type ParameterSchema struct {
Name string `json:"name"`
ValidationCondition string `json:"validation_condition"`
}

type noopReporter struct{}

func (n *noopReporter) Report(_ *Snapshot) {}
func (n *noopReporter) Close() {}
Loading