Skip to content

feat: Add active users prometheus metric #3406

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"coderdtest",
"codersdk",
"cronstrue",
"databasefake",
"devel",
"drpc",
"drpcconn",
Expand Down Expand Up @@ -52,6 +53,7 @@
"oneof",
"parameterscopeid",
"pqtype",
"prometheusmetrics",
"promptui",
"protobuf",
"provisionerd",
Expand All @@ -72,6 +74,7 @@
"templateversions",
"testdata",
"testid",
"testutil",
"tfexec",
"tfjson",
"tfplan",
Expand Down
33 changes: 22 additions & 11 deletions cli/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/google/uuid"
"github.com/pion/turn/v2"
"github.com/pion/webrtc/v3"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/afero"
"github.com/spf13/cobra"
Expand All @@ -53,6 +54,7 @@ import (
"github.com/coder/coder/coderd/database/databasefake"
"github.com/coder/coder/coderd/devtunnel"
"github.com/coder/coder/coderd/gitsshkey"
"github.com/coder/coder/coderd/prometheusmetrics"
"github.com/coder/coder/coderd/telemetry"
"github.com/coder/coder/coderd/tracing"
"github.com/coder/coder/coderd/turnconn"
Expand Down Expand Up @@ -392,6 +394,26 @@ func server() *cobra.Command {
defer options.Telemetry.Close()
}

// This prevents the pprof import from being accidentally deleted.
_ = pprof.Handler
if pprofEnabled {
//nolint:revive
defer serveHandler(ctx, logger, nil, pprofAddress, "pprof")()
}
if promEnabled {
options.PrometheusRegistry = prometheus.NewRegistry()
closeFunc, err := prometheusmetrics.ActiveUsers(ctx, options.PrometheusRegistry, options.Database, 0)
if err != nil {
return xerrors.Errorf("register active users prometheus metric: %w", err)
}
defer closeFunc()

//nolint:revive
defer serveHandler(ctx, logger, promhttp.InstrumentMetricHandler(
options.PrometheusRegistry, promhttp.HandlerFor(options.PrometheusRegistry, promhttp.HandlerOpts{}),
), promAddress, "prometheus")()
}

coderAPI := coderd.New(options)
defer coderAPI.Close()

Expand All @@ -406,17 +428,6 @@ func server() *cobra.Command {
}
}

// This prevents the pprof import from being accidentally deleted.
_ = pprof.Handler
if pprofEnabled {
//nolint:revive
defer serveHandler(ctx, logger, nil, pprofAddress, "pprof")()
}
if promEnabled {
//nolint:revive
defer serveHandler(ctx, logger, promhttp.Handler(), promAddress, "prometheus")()
}

// Since errCh only has one buffered slot, all routines
// sending on it must be wrapped in a select/default to
// avoid leaving dangling goroutines waiting for the
Expand Down
55 changes: 55 additions & 0 deletions cli/server_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cli_test

import (
"bufio"
"context"
"crypto/ecdsa"
"crypto/elliptic"
Expand All @@ -10,17 +11,21 @@ import (
"crypto/x509/pkix"
"encoding/json"
"encoding/pem"
"fmt"
"math/big"
"net"
"net/http"
"net/http/httptest"
"net/url"
"os"
"runtime"
"strconv"
"strings"
"testing"
"time"

"github.com/go-chi/chi"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"

Expand Down Expand Up @@ -374,6 +379,56 @@ func TestServer(t *testing.T) {
cancelFunc()
<-errC
})
t.Run("Prometheus", func(t *testing.T) {
t.Parallel()
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()

random, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
_ = random.Close()
tcpAddr, valid := random.Addr().(*net.TCPAddr)
require.True(t, valid)
randomPort := tcpAddr.Port

root, cfg := clitest.New(t,
"server",
"--in-memory",
"--address", ":0",
"--provisioner-daemons", "1",
"--prometheus-enable",
"--prometheus-address", ":"+strconv.Itoa(randomPort),
"--cache-dir", t.TempDir(),
)
serverErr := make(chan error, 1)
go func() {
serverErr <- root.ExecuteContext(ctx)
}()
_ = waitAccessURL(t, cfg)

var res *http.Response
require.Eventually(t, func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://127.0.0.1:%d", randomPort), nil)
assert.NoError(t, err)
res, err = http.DefaultClient.Do(req)
return err == nil
}, testutil.WaitShort, testutil.IntervalFast)

scanner := bufio.NewScanner(res.Body)
hasActiveUsers := false
for scanner.Scan() {
// This metric is manually registered to be tracked in the server. That's
// why we test it's tracked here.
if strings.HasPrefix(scanner.Text(), "coderd_api_active_users_duration_hour") {
hasActiveUsers = true
continue
}
}
require.NoError(t, scanner.Err())
require.True(t, hasActiveUsers)
cancelFunc()
<-serverErr
})
}

func generateTLSCertificate(t testing.TB) (certPath, keyPath string) {
Expand Down
7 changes: 6 additions & 1 deletion coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/go-chi/chi/v5/middleware"
"github.com/klauspost/compress/zstd"
"github.com/pion/webrtc/v3"
"github.com/prometheus/client_golang/prometheus"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"golang.org/x/xerrors"
"google.golang.org/api/idtoken"
Expand Down Expand Up @@ -58,6 +59,7 @@ type Options struct {
GoogleTokenValidator *idtoken.Validator
GithubOAuth2Config *GithubOAuth2Config
OIDCConfig *OIDCConfig
PrometheusRegistry *prometheus.Registry
ICEServers []webrtc.ICEServer
SecureAuthCookie bool
SSHKeygenAlgorithm gitsshkey.Algorithm
Expand Down Expand Up @@ -87,6 +89,9 @@ func New(options *Options) *API {
panic(xerrors.Errorf("rego authorize panic: %w", err))
}
}
if options.PrometheusRegistry == nil {
options.PrometheusRegistry = prometheus.NewRegistry()
}

siteCacheDir := options.CacheDir
if siteCacheDir != "" {
Expand Down Expand Up @@ -116,7 +121,7 @@ func New(options *Options) *API {
next.ServeHTTP(middleware.NewWrapResponseWriter(w, r.ProtoMajor), r)
})
},
httpmw.Prometheus,
httpmw.Prometheus(options.PrometheusRegistry),
tracing.HTTPMW(api.TracerProvider, "coderd.http"),
)

Expand Down
88 changes: 45 additions & 43 deletions coderd/httpmw/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,31 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
requestsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{
func durationToFloatMs(d time.Duration) float64 {
return float64(d.Milliseconds())
}

func Prometheus(register prometheus.Registerer) func(http.Handler) http.Handler {
factory := promauto.With(register)
requestsProcessed := factory.NewCounterVec(prometheus.CounterOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "requests_processed_total",
Help: "The total number of processed API requests",
}, []string{"code", "method", "path"})
requestsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{
requestsConcurrent := factory.NewGauge(prometheus.GaugeOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "concurrent_requests",
Help: "The number of concurrent API requests",
})
websocketsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{
websocketsConcurrent := factory.NewGauge(prometheus.GaugeOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "concurrent_websockets",
Help: "The total number of concurrent API websockets",
})
websocketsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{
websocketsDist := factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "websocket_durations_ms",
Expand All @@ -45,58 +50,55 @@ var (
durationToFloatMs(30 * time.Hour),
},
}, []string{"path"})
requestsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{
requestsDist := factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "request_latencies_ms",
Help: "Latency distribution of requests in milliseconds",
Buckets: []float64{1, 5, 10, 25, 50, 100, 500, 1000, 5000, 10000, 30000},
}, []string{"method", "path"})
)

func durationToFloatMs(d time.Duration) float64 {
return float64(d.Milliseconds())
}
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var (
start = time.Now()
method = r.Method
rctx = chi.RouteContext(r.Context())
)

func Prometheus(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var (
start = time.Now()
method = r.Method
rctx = chi.RouteContext(r.Context())
)
sw, ok := w.(chimw.WrapResponseWriter)
if !ok {
panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter")
}
sw, ok := w.(chimw.WrapResponseWriter)
if !ok {
panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter")
}

var (
dist *prometheus.HistogramVec
distOpts []string
)
// We want to count websockets separately.
if isWebsocketUpgrade(r) {
websocketsConcurrent.Inc()
defer websocketsConcurrent.Dec()
var (
dist *prometheus.HistogramVec
distOpts []string
)
// We want to count WebSockets separately.
if isWebsocketUpgrade(r) {
websocketsConcurrent.Inc()
defer websocketsConcurrent.Dec()

dist = websocketsDist
} else {
requestsConcurrent.Inc()
defer requestsConcurrent.Dec()
dist = websocketsDist
} else {
requestsConcurrent.Inc()
defer requestsConcurrent.Dec()

dist = requestsDist
distOpts = []string{method}
}
dist = requestsDist
distOpts = []string{method}
}

next.ServeHTTP(w, r)
next.ServeHTTP(w, r)

path := rctx.RoutePattern()
distOpts = append(distOpts, path)
statusStr := strconv.Itoa(sw.Status())
path := rctx.RoutePattern()
distOpts = append(distOpts, path)
statusStr := strconv.Itoa(sw.Status())

requestsProcessed.WithLabelValues(statusStr, method, path).Inc()
dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6)
})
requestsProcessed.WithLabelValues(statusStr, method, path).Inc()
dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6)
})
}
}

func isWebsocketUpgrade(r *http.Request) bool {
Expand Down
31 changes: 31 additions & 0 deletions coderd/httpmw/prometheus_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package httpmw_test

import (
"context"
"net/http"
"net/http/httptest"
"testing"

"github.com/go-chi/chi/v5"
chimw "github.com/go-chi/chi/v5/middleware"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"

"github.com/coder/coder/coderd/httpmw"
)

func TestPrometheus(t *testing.T) {
t.Parallel()
t.Run("All", func(t *testing.T) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the tests 😎

req := httptest.NewRequest("GET", "/", nil)
req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, chi.NewRouteContext()))
res := chimw.NewWrapResponseWriter(httptest.NewRecorder(), 0)
reg := prometheus.NewRegistry()
httpmw.Prometheus(reg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
})).ServeHTTP(res, req)
metrics, err := reg.Gather()
require.NoError(t, err)
require.Greater(t, len(metrics), 0)
})
}
Loading