Skip to content

fix: only collect prometheus database metrics when explicitly enabled #8045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 15, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cli/server.go
Original file line number Diff line number Diff line change
@@ -589,7 +589,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.

if cfg.InMemoryDatabase {
// This is only used for testing.
options.Database = dbmetrics.New(dbfake.New(), options.PrometheusRegistry)
options.Database = dbfake.New()
options.Pubsub = pubsub.NewInMemory()
} else {
sqlDB, err := connectToPostgres(ctx, logger, sqlDriver, cfg.PostgresURL.String())
@@ -600,14 +600,18 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
_ = sqlDB.Close()
}()

options.Database = dbmetrics.New(database.New(sqlDB), options.PrometheusRegistry)
options.Database = database.New(sqlDB)
options.Pubsub, err = pubsub.New(ctx, sqlDB, cfg.PostgresURL.String())
if err != nil {
return xerrors.Errorf("create pubsub: %w", err)
}
defer options.Pubsub.Close()
}

if options.DeploymentValues.Prometheus.Enable && options.DeploymentValues.Prometheus.CollectDBMetrics {
options.Database = dbmetrics.New(options.Database, options.PrometheusRegistry)
}

var deploymentID string
err = options.Database.InTx(func(tx database.Store) error {
// This will block until the lock is acquired, and will be
148 changes: 102 additions & 46 deletions cli/server_test.go
Original file line number Diff line number Diff line change
@@ -881,59 +881,115 @@ func TestServer(t *testing.T) {
})
t.Run("Prometheus", func(t *testing.T) {
t.Parallel()
random, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
_ = random.Close()
tcpAddr, valid := random.Addr().(*net.TCPAddr)
require.True(t, valid)
randomPort := tcpAddr.Port

inv, cfg := clitest.New(t,
"server",
"--in-memory",
"--http-address", ":0",
"--access-url", "http://example.com",
"--provisioner-daemons", "1",
"--prometheus-enable",
"--prometheus-address", ":"+strconv.Itoa(randomPort),
"--cache-dir", t.TempDir(),
)
randomPort := func(t *testing.T) int {
random, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
_ = random.Close()
tcpAddr, valid := random.Addr().(*net.TCPAddr)
require.True(t, valid)
return tcpAddr.Port
}

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
defer cancel()
t.Run("DBMetricsDisabled", func(t *testing.T) {
t.Parallel()

clitest.Start(t, inv)
_ = waitAccessURL(t, cfg)
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
defer cancel()

var res *http.Response
require.Eventually(t, func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://127.0.0.1:%d", randomPort), nil)
assert.NoError(t, err)
// nolint:bodyclose
res, err = http.DefaultClient.Do(req)
return err == nil
}, testutil.WaitShort, testutil.IntervalFast)
defer res.Body.Close()
randPort := randomPort(t)
inv, cfg := clitest.New(t,
"server",
"--in-memory",
"--http-address", ":0",
"--access-url", "http://example.com",
"--provisioner-daemons", "1",
"--prometheus-enable",
"--prometheus-address", ":"+strconv.Itoa(randPort),
// "--prometheus-collect-db-metrics", // disabled by default
"--cache-dir", t.TempDir(),
)

scanner := bufio.NewScanner(res.Body)
hasActiveUsers := false
hasWorkspaces := false
for scanner.Scan() {
// This metric is manually registered to be tracked in the server. That's
// why we test it's tracked here.
if strings.HasPrefix(scanner.Text(), "coderd_api_active_users_duration_hour") {
hasActiveUsers = true
continue
clitest.Start(t, inv)
_ = waitAccessURL(t, cfg)

var res *http.Response
require.Eventually(t, func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://127.0.0.1:%d", randPort), nil)
assert.NoError(t, err)
// nolint:bodyclose
res, err = http.DefaultClient.Do(req)
return err == nil
}, testutil.WaitShort, testutil.IntervalFast)
defer res.Body.Close()

scanner := bufio.NewScanner(res.Body)
hasActiveUsers := false
hasWorkspaces := false
for scanner.Scan() {
// This metric is manually registered to be tracked in the server. That's
// why we test it's tracked here.
if strings.HasPrefix(scanner.Text(), "coderd_api_active_users_duration_hour") {
hasActiveUsers = true
continue
}
if strings.HasPrefix(scanner.Text(), "coderd_api_workspace_latest_build_total") {
hasWorkspaces = true
continue
}
if strings.HasPrefix(scanner.Text(), "coderd_db_query_latencies_seconds") {
t.Fatal("db metrics should not be tracked when --prometheus-collect-db-metrics is not enabled")
}
t.Logf("scanned %s", scanner.Text())
}
if strings.HasPrefix(scanner.Text(), "coderd_api_workspace_latest_build_total") {
hasWorkspaces = true
continue
require.NoError(t, scanner.Err())
require.True(t, hasActiveUsers)
require.True(t, hasWorkspaces)
})

t.Run("DBMetricsEnabled", func(t *testing.T) {
t.Parallel()

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
defer cancel()

randPort := randomPort(t)
inv, cfg := clitest.New(t,
"server",
"--in-memory",
"--http-address", ":0",
"--access-url", "http://example.com",
"--provisioner-daemons", "1",
"--prometheus-enable",
"--prometheus-address", ":"+strconv.Itoa(randPort),
"--prometheus-collect-db-metrics",
"--cache-dir", t.TempDir(),
)

clitest.Start(t, inv)
_ = waitAccessURL(t, cfg)

var res *http.Response
require.Eventually(t, func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://127.0.0.1:%d", randPort), nil)
assert.NoError(t, err)
// nolint:bodyclose
res, err = http.DefaultClient.Do(req)
return err == nil
}, testutil.WaitShort, testutil.IntervalFast)
defer res.Body.Close()

scanner := bufio.NewScanner(res.Body)
hasDBMetrics := false
for scanner.Scan() {
if strings.HasPrefix(scanner.Text(), "coderd_db_query_latencies_seconds") {
hasDBMetrics = true
}
t.Logf("scanned %s", scanner.Text())
}
t.Logf("scanned %s", scanner.Text())
}
require.NoError(t, scanner.Err())
require.True(t, hasActiveUsers)
require.True(t, hasWorkspaces)
require.NoError(t, scanner.Err())
require.True(t, hasDBMetrics)
})
})
t.Run("GitHubOAuth", func(t *testing.T) {
t.Parallel()
3 changes: 3 additions & 0 deletions cli/testdata/coder_server_--help.golden
Original file line number Diff line number Diff line change
@@ -93,6 +93,9 @@ Use a YAML configuration file when your server launch become unwieldy.
--prometheus-collect-agent-stats bool, $CODER_PROMETHEUS_COLLECT_AGENT_STATS
Collect agent stats (may increase charges for metrics storage).

--prometheus-collect-db-metrics bool, $CODER_PROMETHEUS_COLLECT_DB_METRICS (default: false)
Collect database metrics (may increase charges for metrics storage).

--prometheus-enable bool, $CODER_PROMETHEUS_ENABLE
Serve prometheus metrics on the address defined by prometheus address.

3 changes: 3 additions & 0 deletions cli/testdata/server-config.yaml.golden
Original file line number Diff line number Diff line change
@@ -152,6 +152,9 @@ introspection:
# Collect agent stats (may increase charges for metrics storage).
# (default: <unset>, type: bool)
collect_agent_stats: false
# Collect database metrics (may increase charges for metrics storage).
# (default: false, type: bool)
collect_db_metrics: false
pprof:
# Serve pprof metrics on the address defined by pprof address.
# (default: <unset>, type: bool)
3 changes: 3 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 0 additions & 5 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
@@ -47,7 +47,6 @@ import (
"github.com/coder/coder/coderd/awsidentity"
"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/database/dbauthz"
"github.com/coder/coder/coderd/database/dbmetrics"
"github.com/coder/coder/coderd/database/pubsub"
"github.com/coder/coder/coderd/gitauth"
"github.com/coder/coder/coderd/gitsshkey"
@@ -191,10 +190,6 @@ func New(options *Options) *API {
if options.Authorizer == nil {
options.Authorizer = rbac.NewCachingAuthorizer(options.PrometheusRegistry)
}
// The below are no-ops if already wrapped.
if options.PrometheusRegistry != nil {
options.Database = dbmetrics.New(options.Database, options.PrometheusRegistry)
}
options.Database = dbauthz.New(
options.Database,
options.Authorizer,
11 changes: 11 additions & 0 deletions codersdk/deployment.go
Original file line number Diff line number Diff line change
@@ -229,6 +229,7 @@ type PrometheusConfig struct {
Enable clibase.Bool `json:"enable" typescript:",notnull"`
Address clibase.HostPort `json:"address" typescript:",notnull"`
CollectAgentStats clibase.Bool `json:"collect_agent_stats" typescript:",notnull"`
CollectDBMetrics clibase.Bool `json:"collect_db_metrics" typescript:",notnull"`
}

type PprofConfig struct {
@@ -760,6 +761,16 @@ when required by your organization's security policy.`,
Group: &deploymentGroupIntrospectionPrometheus,
YAML: "collect_agent_stats",
},
{
Name: "Prometheus Collect Database Metrics",
Description: "Collect database metrics (may increase charges for metrics storage).",
Flag: "prometheus-collect-db-metrics",
Env: "CODER_PROMETHEUS_COLLECT_DB_METRICS",
Value: &c.Prometheus.CollectDBMetrics,
Group: &deploymentGroupIntrospectionPrometheus,
YAML: "collect_db_metrics",
Default: "false",
},
// Pprof settings
{
Name: "pprof Enable",
1 change: 1 addition & 0 deletions docs/api/general.md
Original file line number Diff line number Diff line change
@@ -275,6 +275,7 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \
"port": "string"
},
"collect_agent_stats": true,
"collect_db_metrics": true,
"enable": true
},
"provisioner": {
4 changes: 4 additions & 0 deletions docs/api/schemas.md
Original file line number Diff line number Diff line change
@@ -1943,6 +1943,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in
"port": "string"
},
"collect_agent_stats": true,
"collect_db_metrics": true,
"enable": true
},
"provisioner": {
@@ -2270,6 +2271,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in
"port": "string"
},
"collect_agent_stats": true,
"collect_db_metrics": true,
"enable": true
},
"provisioner": {
@@ -3092,6 +3094,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in
"port": "string"
},
"collect_agent_stats": true,
"collect_db_metrics": true,
"enable": true
}
```
@@ -3102,6 +3105,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in
| --------------------- | ------------------------------------ | -------- | ------------ | ----------- |
| `address` | [clibase.HostPort](#clibasehostport) | false | | |
| `collect_agent_stats` | boolean | false | | |
| `collect_db_metrics` | boolean | false | | |
| `enable` | boolean | false | | |

## codersdk.ProvisionerConfig
11 changes: 11 additions & 0 deletions docs/cli/server.md
Original file line number Diff line number Diff line change
@@ -565,6 +565,17 @@ The bind address to serve prometheus metrics.

Collect agent stats (may increase charges for metrics storage).

### --prometheus-collect-db-metrics

| | |
| ----------- | -------------------------------------------------------- |
| Type | <code>bool</code> |
| Environment | <code>$CODER_PROMETHEUS_COLLECT_DB_METRICS</code> |
| YAML | <code>introspection.prometheus.collect_db_metrics</code> |
| Default | <code>false</code> |

Collect database metrics (may increase charges for metrics storage).

### --prometheus-enable

| | |
3 changes: 3 additions & 0 deletions enterprise/cli/testdata/coder_server_--help.golden
Original file line number Diff line number Diff line change
@@ -93,6 +93,9 @@ Use a YAML configuration file when your server launch become unwieldy.
--prometheus-collect-agent-stats bool, $CODER_PROMETHEUS_COLLECT_AGENT_STATS
Collect agent stats (may increase charges for metrics storage).

--prometheus-collect-db-metrics bool, $CODER_PROMETHEUS_COLLECT_DB_METRICS (default: false)
Collect database metrics (may increase charges for metrics storage).

--prometheus-enable bool, $CODER_PROMETHEUS_ENABLE
Serve prometheus metrics on the address defined by prometheus address.

2 changes: 2 additions & 0 deletions scaletest/terraform/coder.tf
Original file line number Diff line number Diff line change
@@ -102,6 +102,8 @@ coder:
value: "true"
- name: "CODER_PROMETHEUS_COLLECT_AGENT_STATS"
value: "true"
- name: "CODER_PROMETHEUS_COLLECT_DB_METRICS"
value: "true"
- name: "CODER_VERBOSE"
value: "true"
image:
1 change: 1 addition & 0 deletions site/src/api/typesGenerated.ts
Original file line number Diff line number Diff line change
@@ -606,6 +606,7 @@ export interface PrometheusConfig {
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- External type
readonly address: any
readonly collect_agent_stats: boolean
readonly collect_db_metrics: boolean
}

// From codersdk/deployment.go