Skip to content

chore: Refactor accepting websocket connections to track for close #7008

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions coderd/activewebsockets/sockets.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package activewebsockets

import (
"context"
"net/http"
"runtime/pprof"
"sync"

"nhooyr.io/websocket"

"github.com/coder/coder/coderd/httpapi"
"github.com/coder/coder/codersdk"
)

// Active is a helper struct that can be used to track active
// websocket connections. All connections will be closed when the parent
// context is canceled.
type Active struct {
ctx context.Context
cancel func()

wg sync.WaitGroup
}

func New(ctx context.Context) *Active {
ctx, cancel := context.WithCancel(ctx)
return &Active{
ctx: ctx,
cancel: cancel,
}
}

// Accept accepts a websocket connection and calls f with the connection.
// The function will be tracked by the Active struct and will be
// closed when the parent context is canceled.
// Steps:
// 1. Ensure we are still accepting websocket connections, and not shutting down.
// 2. Add 1 to the wait group.
// 3. Ensure we decrement the wait group when we are done (defer).
// 4. Accept the websocket connection.
// 4a. If there is an error, write the error to the response writer and return.
// 5. Launch go routine to kill websocket if the parent context is canceled.
// 6. Call 'f' with the websocket connection.
func (a *Active) Accept(rw http.ResponseWriter, r *http.Request, options *websocket.AcceptOptions, f func(conn *websocket.Conn)) {
// Ensure we are still accepting websocket connections, and not shutting down.
if err := a.ctx.Err(); err != nil {
httpapi.Write(context.Background(), rw, http.StatusBadRequest, codersdk.Response{
Message: "No longer accepting websocket requests.",
Detail: err.Error(),
})
return
}
// Ensure we decrement the wait group when we are done.
a.wg.Add(1)
defer a.wg.Done()

// Accept the websocket connection
conn, err := websocket.Accept(rw, r, options)
if err != nil {
httpapi.Write(context.Background(), rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to accept websocket.",
Detail: err.Error(),
})
return
}
// Always track the connection before allowing the caller to handle it.
// This ensures the connection is closed when the parent context is canceled.
// This new context will end if the parent context is cancelled or if
// the connection is closed.
ctx, cancel := context.WithCancel(a.ctx)
defer cancel()
closeConnOnContext(ctx, conn)

// Handle the websocket connection
f(conn)
}

// closeConnOnContext launches a go routine that will watch a given context
// and close a websocket connection if that context is canceled.
func closeConnOnContext(ctx context.Context, conn *websocket.Conn) {
// Labeling the go routine for goroutine dumps/debugging.
go pprof.Do(ctx, pprof.Labels("service", "ActiveWebsockets"), func(ctx context.Context) {
select {
case <-ctx.Done():
_ = conn.Close(websocket.StatusNormalClosure, "")
}
})
}

// Close will close all active websocket connections and wait for them to
// finish.
func (a *Active) Close() {
a.cancel()
a.wg.Wait()
}
14 changes: 5 additions & 9 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"path/filepath"
"regexp"
"strings"
"sync"
"sync/atomic"
"time"

Expand All @@ -39,6 +38,7 @@ import (
"github.com/coder/coder/buildinfo"

// Used to serve the Swagger endpoint
"github.com/coder/coder/coderd/activewebsockets"
_ "github.com/coder/coder/coderd/apidoc"
"github.com/coder/coder/coderd/audit"
"github.com/coder/coder/coderd/awsidentity"
Expand Down Expand Up @@ -316,6 +316,7 @@ func New(options *Options) *API {
TemplateScheduleStore: options.TemplateScheduleStore,
Experiments: experiments,
healthCheckGroup: &singleflight.Group[string, *healthcheck.Report]{},
WebsocketWatch: activewebsockets.New(ctx),
}
if options.UpdateCheckOptions != nil {
api.updateChecker = updatecheck.New(
Expand Down Expand Up @@ -355,7 +356,7 @@ func New(options *Options) *API {
apiRateLimiter := httpmw.RateLimit(options.APIRateLimit, time.Minute)

derpHandler := derphttp.Handler(api.DERPServer)
derpHandler, api.derpCloseFunc = tailnet.WithWebsocketSupport(api.DERPServer, derpHandler)
derpHandler = tailnet.WithWebsocketSupport(api.WebsocketWatch.Accept, api.DERPServer, derpHandler)

r.Use(
httpmw.Recover(api.Logger),
Expand Down Expand Up @@ -784,9 +785,7 @@ type API struct {

siteHandler http.Handler

WebsocketWaitMutex sync.Mutex
WebsocketWaitGroup sync.WaitGroup
derpCloseFunc func()
WebsocketWatch *activewebsockets.Active

metricsCache *metricscache.Cache
workspaceAgentCache *wsconncache.Cache
Expand All @@ -803,11 +802,8 @@ type API struct {
// Close waits for all WebSocket connections to drain before returning.
func (api *API) Close() error {
api.cancel()
api.derpCloseFunc()

api.WebsocketWaitMutex.Lock()
api.WebsocketWaitGroup.Wait()
api.WebsocketWaitMutex.Unlock()
api.WebsocketWatch.Close()

api.metricsCache.Close()
if api.updateChecker != nil {
Expand Down
11 changes: 8 additions & 3 deletions coderd/healthcheck/derp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"tailscale.com/tailcfg"
"tailscale.com/types/key"

"github.com/coder/coder/coderd/activewebsockets"
"github.com/coder/coder/coderd/healthcheck"
"github.com/coder/coder/tailnet"
)
Expand Down Expand Up @@ -124,10 +125,15 @@ func TestDERP(t *testing.T) {
t.Run("ForceWebsockets", func(t *testing.T) {
t.Parallel()

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

derpSrv := derp.NewServer(key.NewNode(), func(format string, args ...any) { t.Logf(format, args...) })
defer derpSrv.Close()
handler, closeHandler := tailnet.WithWebsocketSupport(derpSrv, derphttp.Handler(derpSrv))
defer closeHandler()

sockets := activewebsockets.New(ctx)
handler := tailnet.WithWebsocketSupport(sockets.Accept, derpSrv, derphttp.Handler(derpSrv))
defer sockets.Close()

srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Header.Get("Upgrade") == "DERP" {
Expand All @@ -140,7 +146,6 @@ func TestDERP(t *testing.T) {
}))

var (
ctx = context.Background()
report = healthcheck.DERPReport{}
derpURL, _ = url.Parse(srv.URL)
opts = &healthcheck.DERPReportOptions{
Expand Down
96 changes: 43 additions & 53 deletions coderd/provisionerjobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,71 +113,61 @@ func (api *API) provisionerJobLogs(rw http.ResponseWriter, r *http.Request, job
logs = []database.ProvisionerJobLog{}
}

api.WebsocketWaitMutex.Lock()
api.WebsocketWaitGroup.Add(1)
api.WebsocketWaitMutex.Unlock()
defer api.WebsocketWaitGroup.Done()
conn, err := websocket.Accept(rw, r, nil)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to accept websocket.",
Detail: err.Error(),
})
return
}
go httpapi.Heartbeat(ctx, conn)
api.WebsocketWatch.Accept(rw, r, nil, func(conn *websocket.Conn) {
go httpapi.Heartbeat(ctx, conn)

ctx, wsNetConn := websocketNetConn(ctx, conn, websocket.MessageText)
defer wsNetConn.Close() // Also closes conn.
ctx, wsNetConn := websocketNetConn(ctx, conn, websocket.MessageText)
defer wsNetConn.Close() // Also closes conn.

logIdsDone := make(map[int64]bool)
logIdsDone := make(map[int64]bool)

// The Go stdlib JSON encoder appends a newline character after message write.
encoder := json.NewEncoder(wsNetConn)
for _, provisionerJobLog := range logs {
logIdsDone[provisionerJobLog.ID] = true
err = encoder.Encode(convertProvisionerJobLog(provisionerJobLog))
// The Go stdlib JSON encoder appends a newline character after message write.
encoder := json.NewEncoder(wsNetConn)
for _, provisionerJobLog := range logs {
logIdsDone[provisionerJobLog.ID] = true
err = encoder.Encode(convertProvisionerJobLog(provisionerJobLog))
if err != nil {
return
}
}
job, err = api.Database.GetProvisionerJobByID(ctx, job.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error fetching provisioner job.",
Detail: err.Error(),
})
return
}
}
job, err = api.Database.GetProvisionerJobByID(ctx, job.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error fetching provisioner job.",
Detail: err.Error(),
})
return
}
if job.CompletedAt.Valid {
// job was complete before we queried the database for historical logs
return
}

for {
select {
case <-ctx.Done():
logger.Debug(context.Background(), "job logs context canceled")
if job.CompletedAt.Valid {
// job was complete before we queried the database for historical logs
return
case log, ok := <-bufferedLogs:
// A nil log is sent when complete!
if !ok || log == nil {
logger.Debug(context.Background(), "reached the end of published logs")
}

for {
select {
case <-ctx.Done():
logger.Debug(context.Background(), "job logs context canceled")
return
}
if logIdsDone[log.ID] {
logger.Debug(ctx, "subscribe duplicated log",
slog.F("stage", log.Stage))
} else {
logger.Debug(ctx, "subscribe encoding log",
slog.F("stage", log.Stage))
err = encoder.Encode(convertProvisionerJobLog(*log))
if err != nil {
case log, ok := <-bufferedLogs:
// A nil log is sent when complete!
if !ok || log == nil {
logger.Debug(context.Background(), "reached the end of published logs")
return
}
if logIdsDone[log.ID] {
logger.Debug(ctx, "subscribe duplicated log",
slog.F("stage", log.Stage))
} else {
logger.Debug(ctx, "subscribe encoding log",
slog.F("stage", log.Stage))
err = encoder.Encode(convertProvisionerJobLog(*log))
if err != nil {
return
}
}
}
}
}
})
}

func (api *API) provisionerJobResources(rw http.ResponseWriter, r *http.Request, job database.ProvisionerJob) {
Expand Down
Loading