Skip to content

Commit 8b66a5a

Browse files
chore(codersdk/workspacesdk): make dialer fail fast for authnz errors (#19173)
Closes #18599. The linked issue was created due to me assuming the dialer didn't fail fast at all. In reality, it does fail fast, but only for a select few status codes. Auth[n|z] errors aren't any of those status codes, despite being 'permanent' in the same way a `400` is. This PR makes 401* and 403 'permanent' errors, meaning the dialer will give up immediately after receiving them from coderd. *One reason to receive a 401 is when the supplied resume_token is invalid. These are not permanent errors, and when we encounter those the dialer will retain the existing behaviour of unsetting the resume token and retrying.
1 parent 7eb4119 commit 8b66a5a

File tree

2 files changed

+70
-8
lines changed

2 files changed

+70
-8
lines changed

codersdk/workspacesdk/dialer.go

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ var permanentErrorStatuses = []int{
2424
http.StatusBadRequest, // returned if API mismatch
2525
http.StatusNotFound, // returned if user doesn't have permission or agent doesn't exist
2626
http.StatusInternalServerError, // returned if database is not reachable,
27+
http.StatusForbidden, // returned if user is not authorized
28+
// StatusUnauthorized is only a permanent error if the error is not due to
29+
// an invalid resume token. See `checkResumeTokenFailure`.
30+
http.StatusUnauthorized,
2731
}
2832

2933
type WebsocketDialer struct {
@@ -39,6 +43,24 @@ type WebsocketDialer struct {
3943
isFirst bool
4044
}
4145

46+
// checkResumeTokenFailure checks if the parsed error indicates a resume token failure
47+
// and updates the resumeTokenFailed flag accordingly. Returns true if a resume token
48+
// failure was detected.
49+
func (w *WebsocketDialer) checkResumeTokenFailure(ctx context.Context, sdkErr *codersdk.Error) bool {
50+
if sdkErr == nil {
51+
return false
52+
}
53+
54+
for _, v := range sdkErr.Validations {
55+
if v.Field == "resume_token" {
56+
w.logger.Warn(ctx, "failed to dial tailnet v2+ API: server replied invalid resume token; unsetting for next connection attempt")
57+
w.resumeTokenFailed = true
58+
return true
59+
}
60+
}
61+
return false
62+
}
63+
4264
type WebsocketDialerOption func(*WebsocketDialer)
4365

4466
func WithWorkspaceUpdates(req *proto.WorkspaceUpdatesRequest) WebsocketDialerOption {
@@ -82,9 +104,14 @@ func (w *WebsocketDialer) Dial(ctx context.Context, r tailnet.ResumeTokenControl
82104
if w.isFirst {
83105
if res != nil && slices.Contains(permanentErrorStatuses, res.StatusCode) {
84106
err = codersdk.ReadBodyAsError(res)
85-
// A bit more human-readable help in the case the API version was rejected
86107
var sdkErr *codersdk.Error
87108
if xerrors.As(err, &sdkErr) {
109+
// Check for resume token failure first
110+
if w.checkResumeTokenFailure(ctx, sdkErr) {
111+
return tailnet.ControlProtocolClients{}, err
112+
}
113+
114+
// A bit more human-readable help in the case the API version was rejected
88115
if sdkErr.Message == AgentAPIMismatchMessage &&
89116
sdkErr.StatusCode() == http.StatusBadRequest {
90117
sdkErr.Helper = fmt.Sprintf(
@@ -107,13 +134,8 @@ func (w *WebsocketDialer) Dial(ctx context.Context, r tailnet.ResumeTokenControl
107134
bodyErr := codersdk.ReadBodyAsError(res)
108135
var sdkErr *codersdk.Error
109136
if xerrors.As(bodyErr, &sdkErr) {
110-
for _, v := range sdkErr.Validations {
111-
if v.Field == "resume_token" {
112-
// Unset the resume token for the next attempt
113-
w.logger.Warn(ctx, "failed to dial tailnet v2+ API: server replied invalid resume token; unsetting for next connection attempt")
114-
w.resumeTokenFailed = true
115-
return tailnet.ControlProtocolClients{}, err
116-
}
137+
if w.checkResumeTokenFailure(ctx, sdkErr) {
138+
return tailnet.ControlProtocolClients{}, err
117139
}
118140
}
119141
if !errors.Is(err, context.Canceled) {

codersdk/workspacesdk/dialer_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,46 @@ func TestWebsocketDialer_ResumeTokenFailure(t *testing.T) {
270270
require.Error(t, err)
271271
}
272272

273+
func TestWebsocketDialer_UnauthenticatedFailFast(t *testing.T) {
274+
t.Parallel()
275+
ctx := testutil.Context(t, testutil.WaitShort)
276+
logger := slogtest.Make(t, &slogtest.Options{
277+
IgnoreErrors: true,
278+
}).Leveled(slog.LevelDebug)
279+
280+
svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
281+
httpapi.Write(ctx, w, http.StatusUnauthorized, codersdk.Response{})
282+
}))
283+
defer svr.Close()
284+
svrURL, err := url.Parse(svr.URL)
285+
require.NoError(t, err)
286+
287+
uut := workspacesdk.NewWebsocketDialer(logger, svrURL, &websocket.DialOptions{})
288+
289+
_, err = uut.Dial(ctx, nil)
290+
require.Error(t, err)
291+
}
292+
293+
func TestWebsocketDialer_UnauthorizedFailFast(t *testing.T) {
294+
t.Parallel()
295+
ctx := testutil.Context(t, testutil.WaitShort)
296+
logger := slogtest.Make(t, &slogtest.Options{
297+
IgnoreErrors: true,
298+
}).Leveled(slog.LevelDebug)
299+
300+
svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
301+
httpapi.Write(ctx, w, http.StatusUnauthorized, codersdk.Response{})
302+
}))
303+
defer svr.Close()
304+
svrURL, err := url.Parse(svr.URL)
305+
require.NoError(t, err)
306+
307+
uut := workspacesdk.NewWebsocketDialer(logger, svrURL, &websocket.DialOptions{})
308+
309+
_, err = uut.Dial(ctx, nil)
310+
require.Error(t, err)
311+
}
312+
273313
func TestWebsocketDialer_UplevelVersion(t *testing.T) {
274314
t.Parallel()
275315
ctx := testutil.Context(t, testutil.WaitShort)

0 commit comments

Comments
 (0)