Skip to content

Commit 77acf0c

Browse files
authored
feat: provisionerd tracing, add public trace ingestion (coder#4070)
1 parent fc84189 commit 77acf0c

File tree

13 files changed

+377
-165
lines changed

13 files changed

+377
-165
lines changed

cli/server.go

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import (
3434
"github.com/prometheus/client_golang/prometheus/promhttp"
3535
"github.com/spf13/afero"
3636
"github.com/spf13/cobra"
37-
sdktrace "go.opentelemetry.io/otel/sdk/trace"
37+
"go.opentelemetry.io/otel/trace"
3838
"golang.org/x/oauth2"
3939
xgithub "golang.org/x/oauth2/github"
4040
"golang.org/x/sync/errgroup"
@@ -115,7 +115,7 @@ func Server(newAPI func(*coderd.Options) *coderd.API) *cobra.Command {
115115
turnRelayAddress string
116116
tunnel bool
117117
stunServers []string
118-
trace bool
118+
traceEnable bool
119119
secureAuthCookie bool
120120
sshKeygenAlgorithmRaw string
121121
autoImportTemplates []string
@@ -159,26 +159,32 @@ func Server(newAPI func(*coderd.Options) *coderd.API) *cobra.Command {
159159
defer http.DefaultClient.CloseIdleConnections()
160160

161161
var (
162-
tracerProvider *sdktrace.TracerProvider
162+
tracerProvider trace.TracerProvider
163163
err error
164164
sqlDriver = "postgres"
165165
)
166-
if trace {
167-
tracerProvider, err = tracing.TracerProvider(ctx, "coderd")
166+
167+
if traceEnable || telemetryEnable {
168+
sdkTracerProvider, err := tracing.TracerProvider(ctx, "coderd", tracing.TracerOpts{
169+
Default: traceEnable,
170+
Coder: telemetryEnable && !isTest(),
171+
})
168172
if err != nil {
169-
logger.Warn(ctx, "failed to start telemetry exporter", slog.Error(err))
173+
logger.Warn(ctx, "start telemetry exporter", slog.Error(err))
170174
} else {
171175
// allow time for traces to flush even if command context is canceled
172176
defer func() {
173-
_ = shutdownWithTimeout(tracerProvider, 5*time.Second)
177+
_ = shutdownWithTimeout(sdkTracerProvider, 5*time.Second)
174178
}()
175179

176-
d, err := tracing.PostgresDriver(tracerProvider, "coderd.database")
180+
d, err := tracing.PostgresDriver(sdkTracerProvider, "coderd.database")
177181
if err != nil {
178-
logger.Warn(ctx, "failed to start postgres tracing driver", slog.Error(err))
182+
logger.Warn(ctx, "start postgres tracing driver", slog.Error(err))
179183
} else {
180184
sqlDriver = d
181185
}
186+
187+
tracerProvider = sdkTracerProvider
182188
}
183189
}
184190

@@ -838,7 +844,7 @@ func Server(newAPI func(*coderd.Options) *coderd.API) *cobra.Command {
838844
cliflag.StringArrayVarP(root.Flags(), &stunServers, "stun-server", "", "CODER_STUN_SERVERS", []string{
839845
"stun:stun.l.google.com:19302",
840846
}, "Specify URLs for STUN servers to enable P2P connections.")
841-
cliflag.BoolVarP(root.Flags(), &trace, "trace", "", "CODER_TRACE", false, "Specifies if application tracing data is collected")
847+
cliflag.BoolVarP(root.Flags(), &traceEnable, "trace", "", "CODER_TRACE", false, "Specifies if application tracing data is collected")
842848
cliflag.StringVarP(root.Flags(), &turnRelayAddress, "turn-relay-address", "", "CODER_TURN_RELAY_ADDRESS", "127.0.0.1",
843849
"Specifies the address to bind TURN connections.")
844850
cliflag.BoolVarP(root.Flags(), &secureAuthCookie, "secure-auth-cookie", "", "CODER_SECURE_AUTH_COOKIE", false, "Specifies if the 'Secure' property is set on browser session cookies")
@@ -915,8 +921,13 @@ func shutdownWithTimeout(s interface{ Shutdown(context.Context) error }, timeout
915921
}
916922

917923
// nolint:revive
918-
func newProvisionerDaemon(ctx context.Context, coderAPI *coderd.API,
919-
logger slog.Logger, cacheDir string, errCh chan error, dev bool,
924+
func newProvisionerDaemon(
925+
ctx context.Context,
926+
coderAPI *coderd.API,
927+
logger slog.Logger,
928+
cacheDir string,
929+
errCh chan error,
930+
dev bool,
920931
) (srv *provisionerd.Server, err error) {
921932
ctx, cancel := context.WithCancel(ctx)
922933
defer func() {
@@ -989,6 +1000,7 @@ func newProvisionerDaemon(ctx context.Context, coderAPI *coderd.API,
9891000
UpdateInterval: 500 * time.Millisecond,
9901001
Provisioners: provisioners,
9911002
WorkDirectory: tempDir,
1003+
Tracer: coderAPI.TracerProvider,
9921004
}), nil
9931005
}
9941006

coderd/coderd.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
"github.com/klauspost/compress/zstd"
1616
"github.com/pion/webrtc/v3"
1717
"github.com/prometheus/client_golang/prometheus"
18-
sdktrace "go.opentelemetry.io/otel/sdk/trace"
18+
"go.opentelemetry.io/otel/trace"
1919
"golang.org/x/xerrors"
2020
"google.golang.org/api/idtoken"
2121
"tailscale.com/derp"
@@ -70,7 +70,7 @@ type Options struct {
7070
SSHKeygenAlgorithm gitsshkey.Algorithm
7171
Telemetry telemetry.Reporter
7272
TURNServer *turnconn.Server
73-
TracerProvider *sdktrace.TracerProvider
73+
TracerProvider trace.TracerProvider
7474
AutoImportTemplates []AutoImportTemplate
7575
LicenseHandler http.Handler
7676
FeaturesService features.Service

coderd/tracing/exporter.go

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,74 @@ import (
55

66
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
77
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
8+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
89
"go.opentelemetry.io/otel/sdk/resource"
910
sdktrace "go.opentelemetry.io/otel/sdk/trace"
10-
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
11+
semconv "go.opentelemetry.io/otel/semconv/v1.10.0"
1112
"golang.org/x/xerrors"
1213
)
1314

15+
// TracerOpts specifies which telemetry exporters should be configured.
16+
type TracerOpts struct {
17+
// Default exports to a backend configured by environment variables. See:
18+
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md
19+
Default bool
20+
// Coder exports traces to Coder's public tracing ingest service and is used
21+
// to improve the product. It is disabled when opting out of telemetry.
22+
Coder bool
23+
}
24+
1425
// TracerProvider creates a grpc otlp exporter and configures a trace provider.
1526
// Caller is responsible for calling TracerProvider.Shutdown to ensure all data is flushed.
16-
func TracerProvider(ctx context.Context, service string) (*sdktrace.TracerProvider, error) {
17-
res, err := resource.New(ctx,
18-
resource.WithAttributes(
19-
// the service name used to display traces in backends
20-
semconv.ServiceNameKey.String(service),
21-
),
27+
func TracerProvider(ctx context.Context, service string, opts TracerOpts) (*sdktrace.TracerProvider, error) {
28+
res := resource.NewWithAttributes(
29+
semconv.SchemaURL,
30+
// the service name used to display traces in backends
31+
semconv.ServiceNameKey.String(service),
2232
)
23-
if err != nil {
24-
return nil, xerrors.Errorf("creating otlp resource: %w", err)
33+
34+
tracerOpts := []sdktrace.TracerProviderOption{
35+
sdktrace.WithResource(res),
2536
}
37+
if opts.Default {
38+
exporter, err := DefaultExporter(ctx)
39+
if err != nil {
40+
return nil, xerrors.Errorf("default exporter: %w", err)
41+
}
42+
tracerOpts = append(tracerOpts, sdktrace.WithBatcher(exporter))
43+
}
44+
if opts.Coder {
45+
exporter, err := CoderExporter(ctx)
46+
if err != nil {
47+
return nil, xerrors.Errorf("coder exporter: %w", err)
48+
}
49+
tracerOpts = append(tracerOpts, sdktrace.WithBatcher(exporter))
50+
}
51+
52+
tracerProvider := sdktrace.NewTracerProvider(tracerOpts...)
53+
54+
return tracerProvider, nil
55+
}
2656

27-
// By default we send span data to a local otel collector.
28-
// The endpoint we push to can be configured with env vars.
29-
// See https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md
57+
func DefaultExporter(ctx context.Context) (*otlptrace.Exporter, error) {
3058
exporter, err := otlptrace.New(ctx, otlptracegrpc.NewClient(otlptracegrpc.WithInsecure()))
3159
if err != nil {
32-
return nil, xerrors.Errorf("creating otlp exporter: %w", err)
60+
return nil, xerrors.Errorf("create otlp exporter: %w", err)
3361
}
3462

35-
tracerProvider := sdktrace.NewTracerProvider(
36-
sdktrace.WithBatcher(exporter),
37-
sdktrace.WithResource(res),
38-
)
63+
return exporter, nil
64+
}
3965

40-
return tracerProvider, nil
66+
func CoderExporter(ctx context.Context) (*otlptrace.Exporter, error) {
67+
opts := []otlptracehttp.Option{
68+
otlptracehttp.WithEndpoint("oss-otel-ingest-http.coder.app:443"),
69+
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
70+
}
71+
72+
exporter, err := otlptrace.New(ctx, otlptracehttp.NewClient(opts...))
73+
if err != nil {
74+
return nil, xerrors.Errorf("create otlp exporter: %w", err)
75+
}
76+
77+
return exporter, nil
4178
}

coderd/tracing/httpmw.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@ import (
55
"net/http"
66

77
"github.com/go-chi/chi/v5"
8-
sdktrace "go.opentelemetry.io/otel/sdk/trace"
98
semconv "go.opentelemetry.io/otel/semconv/v1.10.0"
109
"go.opentelemetry.io/otel/trace"
1110

1211
"github.com/coder/coder/coderd/httpapi"
1312
)
1413

1514
// HTTPMW adds tracing to http routes.
16-
func HTTPMW(tracerProvider *sdktrace.TracerProvider, name string) func(http.Handler) http.Handler {
15+
func HTTPMW(tracerProvider trace.TracerProvider, name string) func(http.Handler) http.Handler {
1716
return func(next http.Handler) http.Handler {
1817
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
1918
if tracerProvider == nil {
@@ -34,20 +33,15 @@ func HTTPMW(tracerProvider *sdktrace.TracerProvider, name string) func(http.Hand
3433
// pass the span through the request context and serve the request to the next middleware
3534
next.ServeHTTP(sw, r)
3635
// capture response data
37-
EndHTTPSpan(r, sw.Status)
36+
EndHTTPSpan(r, sw.Status, span)
3837
})
3938
}
4039
}
4140

4241
// EndHTTPSpan captures request and response data after the handler is done.
43-
func EndHTTPSpan(r *http.Request, status int) {
44-
span := trace.SpanFromContext(r.Context())
45-
42+
func EndHTTPSpan(r *http.Request, status int, span trace.Span) {
4643
// set the resource name as we get it only once the handler is executed
4744
route := chi.RouteContext(r.Context()).RoutePattern()
48-
if route != "" {
49-
span.SetName(fmt.Sprintf("%s %s", r.Method, route))
50-
}
5145
span.SetName(fmt.Sprintf("%s %s", r.Method, route))
5246
span.SetAttributes(semconv.NetAttributesFromHTTPRequest("tcp", r)...)
5347
span.SetAttributes(semconv.EndUserAttributesFromHTTPRequest(r)...)

coderd/tracing/postgres.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"strings"
77

88
"github.com/nhatthm/otelsql"
9-
semconv "go.opentelemetry.io/otel/semconv/v1.7.0"
9+
semconv "go.opentelemetry.io/otel/semconv/v1.10.0"
1010
"go.opentelemetry.io/otel/trace"
1111
"golang.org/x/xerrors"
1212
)

coderd/tracing/util.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package tracing
2+
3+
import (
4+
"runtime"
5+
"strings"
6+
)
7+
8+
func FuncName() string {
9+
fnpc, _, _, ok := runtime.Caller(1)
10+
if !ok {
11+
return ""
12+
}
13+
fn := runtime.FuncForPC(fnpc)
14+
name := fn.Name()
15+
if i := strings.LastIndex(name, "/"); i > 0 {
16+
name = name[i+1:]
17+
}
18+
return name
19+
}

coderd/tracing/util_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package tracing_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
8+
"github.com/coder/coder/coderd/tracing"
9+
)
10+
11+
// t.Parallel affects the result of these tests.
12+
13+
//nolint:paralleltest
14+
func TestFuncName(t *testing.T) {
15+
fn := tracing.FuncName()
16+
assert.Equal(t, "tracing_test.TestFuncName", fn)
17+
}
18+
19+
type foo struct{}
20+
21+
func (foo) bar() string {
22+
return tracing.FuncName()
23+
}
24+
25+
//nolint:paralleltest
26+
func TestFuncNameMethod(t *testing.T) {
27+
fn := foo{}.bar()
28+
assert.Equal(t, "tracing_test.foo.bar", fn)
29+
}
30+
31+
func (*foo) baz() string {
32+
return tracing.FuncName()
33+
}
34+
35+
//nolint:paralleltest
36+
func TestFuncNameMethodPointer(t *testing.T) {
37+
fn := (&foo{}).baz()
38+
assert.Equal(t, "tracing_test.(*foo).baz", fn)
39+
}

coderd/workspaceagents.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616

1717
"github.com/google/uuid"
1818
"github.com/hashicorp/yamux"
19+
"go.opentelemetry.io/otel/trace"
1920
"golang.org/x/mod/semver"
2021
"golang.org/x/xerrors"
2122
"nhooyr.io/websocket"
@@ -113,7 +114,7 @@ func (api *API) workspaceAgentDial(rw http.ResponseWriter, r *http.Request) {
113114
}
114115

115116
// end span so we don't get long lived trace data
116-
tracing.EndHTTPSpan(r, 200)
117+
tracing.EndHTTPSpan(r, http.StatusOK, trace.SpanFromContext(ctx))
117118

118119
err = peerbroker.ProxyListen(ctx, session, peerbroker.ProxyOptions{
119120
ChannelID: workspaceAgent.ID.String(),
@@ -309,7 +310,7 @@ func (api *API) workspaceAgentListen(rw http.ResponseWriter, r *http.Request) {
309310
}
310311

311312
// end span so we don't get long lived trace data
312-
tracing.EndHTTPSpan(r, 200)
313+
tracing.EndHTTPSpan(r, http.StatusOK, trace.SpanFromContext(ctx))
313314

314315
api.Logger.Info(ctx, "accepting agent", slog.F("resource", resource), slog.F("agent", workspaceAgent))
315316

@@ -398,8 +399,9 @@ func (api *API) workspaceAgentTurn(rw http.ResponseWriter, r *http.Request) {
398399
}
399400

400401
ctx, wsNetConn := websocketNetConn(r.Context(), wsConn, websocket.MessageBinary)
401-
defer wsNetConn.Close() // Also closes conn.
402-
tracing.EndHTTPSpan(r, 200) // end span so we don't get long lived trace data
402+
defer wsNetConn.Close() // Also closes conn.
403+
// end span so we don't get long lived trace data
404+
tracing.EndHTTPSpan(r, http.StatusOK, trace.SpanFromContext(ctx))
403405

404406
api.Logger.Debug(ctx, "accepting turn connection", slog.F("remote-address", r.RemoteAddr), slog.F("local-address", localAddress))
405407
select {

coderd/workspaceapps.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010

1111
"github.com/go-chi/chi/v5"
12+
"go.opentelemetry.io/otel/trace"
1213

1314
"github.com/coder/coder/coderd/database"
1415
"github.com/coder/coder/coderd/httpapi"
@@ -125,6 +126,7 @@ type proxyApplication struct {
125126
}
126127

127128
func (api *API) proxyWorkspaceApplication(proxyApp proxyApplication, rw http.ResponseWriter, r *http.Request) {
129+
ctx := r.Context()
128130
if !api.Authorize(r, rbac.ActionCreate, proxyApp.Workspace.ExecutionRBAC()) {
129131
httpapi.ResourceNotFound(rw)
130132
return
@@ -138,7 +140,7 @@ func (api *API) proxyWorkspaceApplication(proxyApp proxyApplication, rw http.Res
138140
// If the app name was used instead, fetch the app from the database so we
139141
// can get the internal URL.
140142
if proxyApp.AppName != "" {
141-
app, err := api.Database.GetWorkspaceAppByAgentIDAndName(r.Context(), database.GetWorkspaceAppByAgentIDAndNameParams{
143+
app, err := api.Database.GetWorkspaceAppByAgentIDAndName(ctx, database.GetWorkspaceAppByAgentIDAndNameParams{
142144
AgentID: proxyApp.Agent.ID,
143145
Name: proxyApp.AppName,
144146
})
@@ -195,7 +197,7 @@ func (api *API) proxyWorkspaceApplication(proxyApp proxyApplication, rw http.Res
195197
if proxyApp.DashboardOnError {
196198
// To pass friendly errors to the frontend, special meta tags are
197199
// overridden in the index.html with the content passed here.
198-
r = r.WithContext(site.WithAPIResponse(r.Context(), site.APIResponse{
200+
r = r.WithContext(site.WithAPIResponse(ctx, site.APIResponse{
199201
StatusCode: http.StatusBadGateway,
200202
Message: err.Error(),
201203
}))
@@ -228,7 +230,7 @@ func (api *API) proxyWorkspaceApplication(proxyApp proxyApplication, rw http.Res
228230
proxy.Transport = conn.HTTPTransport()
229231

230232
// end span so we don't get long lived trace data
231-
tracing.EndHTTPSpan(r, 200)
233+
tracing.EndHTTPSpan(r, http.StatusOK, trace.SpanFromContext(ctx))
232234

233235
proxy.ServeHTTP(rw, r)
234236
}

0 commit comments

Comments
 (0)