diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 7bea770248310..859cdd4b9ce6c 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -942,13 +942,16 @@ CREATE TABLE external_auth_links ( oauth_expiry timestamp with time zone NOT NULL, oauth_access_token_key_id text, oauth_refresh_token_key_id text, - oauth_extra jsonb + oauth_extra jsonb, + oauth_refresh_failure_reason text DEFAULT ''::text NOT NULL ); COMMENT ON COLUMN external_auth_links.oauth_access_token_key_id IS 'The ID of the key used to encrypt the OAuth access token. If this is NULL, the access token is not encrypted'; COMMENT ON COLUMN external_auth_links.oauth_refresh_token_key_id IS 'The ID of the key used to encrypt the OAuth refresh token. If this is NULL, the refresh token is not encrypted'; +COMMENT ON COLUMN external_auth_links.oauth_refresh_failure_reason IS 'This error means the refresh token is invalid. Cached so we can avoid calling the external provider again for the same error.'; + CREATE TABLE files ( hash character varying(64) NOT NULL, created_at timestamp with time zone NOT NULL, diff --git a/coderd/database/migrations/000358_failed_ext_auth_error.down.sql b/coderd/database/migrations/000358_failed_ext_auth_error.down.sql new file mode 100644 index 0000000000000..72cad82d36a1e --- /dev/null +++ b/coderd/database/migrations/000358_failed_ext_auth_error.down.sql @@ -0,0 +1,3 @@ +ALTER TABLE external_auth_links + DROP COLUMN oauth_refresh_failure_reason +; diff --git a/coderd/database/migrations/000358_failed_ext_auth_error.up.sql b/coderd/database/migrations/000358_failed_ext_auth_error.up.sql new file mode 100644 index 0000000000000..f2030ecbeeca2 --- /dev/null +++ b/coderd/database/migrations/000358_failed_ext_auth_error.up.sql @@ -0,0 +1,7 @@ +ALTER TABLE external_auth_links + ADD COLUMN oauth_refresh_failure_reason TEXT NOT NULL DEFAULT '' +; + +COMMENT ON COLUMN external_auth_links.oauth_refresh_failure_reason IS + 'This error means the refresh token is invalid. Cached so we can avoid calling the external provider again for the same error.' +; diff --git a/coderd/database/models.go b/coderd/database/models.go index 75d2b941dab3c..057d7956e5bbd 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -3065,6 +3065,8 @@ type ExternalAuthLink struct { // The ID of the key used to encrypt the OAuth refresh token. If this is NULL, the refresh token is not encrypted OAuthRefreshTokenKeyID sql.NullString `db:"oauth_refresh_token_key_id" json:"oauth_refresh_token_key_id"` OAuthExtra pqtype.NullRawMessage `db:"oauth_extra" json:"oauth_extra"` + // This error means the refresh token is invalid. Cached so we can avoid calling the external provider again for the same error. + OauthRefreshFailureReason string `db:"oauth_refresh_failure_reason" json:"oauth_refresh_failure_reason"` } type File struct { diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 58874cb7ed8c8..22aec98794fb3 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -1711,7 +1711,7 @@ func (q *sqlQuerier) DeleteExternalAuthLink(ctx context.Context, arg DeleteExter } const getExternalAuthLink = `-- name: GetExternalAuthLink :one -SELECT provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra FROM external_auth_links WHERE provider_id = $1 AND user_id = $2 +SELECT provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra, oauth_refresh_failure_reason FROM external_auth_links WHERE provider_id = $1 AND user_id = $2 ` type GetExternalAuthLinkParams struct { @@ -1733,12 +1733,13 @@ func (q *sqlQuerier) GetExternalAuthLink(ctx context.Context, arg GetExternalAut &i.OAuthAccessTokenKeyID, &i.OAuthRefreshTokenKeyID, &i.OAuthExtra, + &i.OauthRefreshFailureReason, ) return i, err } const getExternalAuthLinksByUserID = `-- name: GetExternalAuthLinksByUserID :many -SELECT provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra FROM external_auth_links WHERE user_id = $1 +SELECT provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra, oauth_refresh_failure_reason FROM external_auth_links WHERE user_id = $1 ` func (q *sqlQuerier) GetExternalAuthLinksByUserID(ctx context.Context, userID uuid.UUID) ([]ExternalAuthLink, error) { @@ -1761,6 +1762,7 @@ func (q *sqlQuerier) GetExternalAuthLinksByUserID(ctx context.Context, userID uu &i.OAuthAccessTokenKeyID, &i.OAuthRefreshTokenKeyID, &i.OAuthExtra, + &i.OauthRefreshFailureReason, ); err != nil { return nil, err } @@ -1798,7 +1800,7 @@ INSERT INTO external_auth_links ( $8, $9, $10 -) RETURNING provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra +) RETURNING provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra, oauth_refresh_failure_reason ` type InsertExternalAuthLinkParams struct { @@ -1839,6 +1841,7 @@ func (q *sqlQuerier) InsertExternalAuthLink(ctx context.Context, arg InsertExter &i.OAuthAccessTokenKeyID, &i.OAuthRefreshTokenKeyID, &i.OAuthExtra, + &i.OauthRefreshFailureReason, ) return i, err } @@ -1851,8 +1854,12 @@ UPDATE external_auth_links SET oauth_refresh_token = $6, oauth_refresh_token_key_id = $7, oauth_expiry = $8, - oauth_extra = $9 -WHERE provider_id = $1 AND user_id = $2 RETURNING provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra + oauth_extra = $9, + -- Only 'UpdateExternalAuthLinkRefreshToken' supports updating the oauth_refresh_failure_reason. + -- Any updates to the external auth link, will be assumed to change the state and clear + -- any cached errors. + oauth_refresh_failure_reason = '' +WHERE provider_id = $1 AND user_id = $2 RETURNING provider_id, user_id, created_at, updated_at, oauth_access_token, oauth_refresh_token, oauth_expiry, oauth_access_token_key_id, oauth_refresh_token_key_id, oauth_extra, oauth_refresh_failure_reason ` type UpdateExternalAuthLinkParams struct { @@ -1891,6 +1898,7 @@ func (q *sqlQuerier) UpdateExternalAuthLink(ctx context.Context, arg UpdateExter &i.OAuthAccessTokenKeyID, &i.OAuthRefreshTokenKeyID, &i.OAuthExtra, + &i.OauthRefreshFailureReason, ) return i, err } @@ -1899,27 +1907,32 @@ const updateExternalAuthLinkRefreshToken = `-- name: UpdateExternalAuthLinkRefre UPDATE external_auth_links SET - oauth_refresh_token = $1, - updated_at = $2 + -- oauth_refresh_failure_reason can be set to cache the failure reason + -- for subsequent refresh attempts. + oauth_refresh_failure_reason = $1, + oauth_refresh_token = $2, + updated_at = $3 WHERE - provider_id = $3 + provider_id = $4 AND - user_id = $4 + user_id = $5 AND -- Required for sqlc to generate a parameter for the oauth_refresh_token_key_id - $5 :: text = $5 :: text + $6 :: text = $6 :: text ` type UpdateExternalAuthLinkRefreshTokenParams struct { - OAuthRefreshToken string `db:"oauth_refresh_token" json:"oauth_refresh_token"` - UpdatedAt time.Time `db:"updated_at" json:"updated_at"` - ProviderID string `db:"provider_id" json:"provider_id"` - UserID uuid.UUID `db:"user_id" json:"user_id"` - OAuthRefreshTokenKeyID string `db:"oauth_refresh_token_key_id" json:"oauth_refresh_token_key_id"` + OauthRefreshFailureReason string `db:"oauth_refresh_failure_reason" json:"oauth_refresh_failure_reason"` + OAuthRefreshToken string `db:"oauth_refresh_token" json:"oauth_refresh_token"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + ProviderID string `db:"provider_id" json:"provider_id"` + UserID uuid.UUID `db:"user_id" json:"user_id"` + OAuthRefreshTokenKeyID string `db:"oauth_refresh_token_key_id" json:"oauth_refresh_token_key_id"` } func (q *sqlQuerier) UpdateExternalAuthLinkRefreshToken(ctx context.Context, arg UpdateExternalAuthLinkRefreshTokenParams) error { _, err := q.db.ExecContext(ctx, updateExternalAuthLinkRefreshToken, + arg.OauthRefreshFailureReason, arg.OAuthRefreshToken, arg.UpdatedAt, arg.ProviderID, diff --git a/coderd/database/queries/externalauth.sql b/coderd/database/queries/externalauth.sql index 4368ce56589f0..9ca5cf6f871ad 100644 --- a/coderd/database/queries/externalauth.sql +++ b/coderd/database/queries/externalauth.sql @@ -40,13 +40,20 @@ UPDATE external_auth_links SET oauth_refresh_token = $6, oauth_refresh_token_key_id = $7, oauth_expiry = $8, - oauth_extra = $9 + oauth_extra = $9, + -- Only 'UpdateExternalAuthLinkRefreshToken' supports updating the oauth_refresh_failure_reason. + -- Any updates to the external auth link, will be assumed to change the state and clear + -- any cached errors. + oauth_refresh_failure_reason = '' WHERE provider_id = $1 AND user_id = $2 RETURNING *; -- name: UpdateExternalAuthLinkRefreshToken :exec UPDATE external_auth_links SET + -- oauth_refresh_failure_reason can be set to cache the failure reason + -- for subsequent refresh attempts. + oauth_refresh_failure_reason = @oauth_refresh_failure_reason, oauth_refresh_token = @oauth_refresh_token, updated_at = @updated_at WHERE diff --git a/coderd/externalauth/externalauth.go b/coderd/externalauth/externalauth.go index 9b8b87748e784..24ebe13d03074 100644 --- a/coderd/externalauth/externalauth.go +++ b/coderd/externalauth/externalauth.go @@ -14,6 +14,7 @@ import ( "strings" "time" + "github.com/dustin/go-humanize" "golang.org/x/oauth2" "golang.org/x/xerrors" @@ -28,6 +29,13 @@ import ( "github.com/coder/retry" ) +const ( + // failureReasonLimit is the maximum text length of an error to be cached to the + // database for a failed refresh token. In rare cases, the error could be a large + // HTML payload. + failureReasonLimit = 400 +) + // Config is used for authentication for Git operations. type Config struct { promoauth.InstrumentedOAuth2Config @@ -121,11 +129,12 @@ func (c *Config) RefreshToken(ctx context.Context, db database.Store, externalAu return externalAuthLink, InvalidTokenError("token expired, refreshing is either disabled or refreshing failed and will not be retried") } + refreshToken := externalAuthLink.OAuthRefreshToken + // This is additional defensive programming. Because TokenSource is an interface, // we cannot be sure that the implementation will treat an 'IsZero' time // as "not-expired". The default implementation does, but a custom implementation // might not. Removing the refreshToken will guarantee a refresh will fail. - refreshToken := externalAuthLink.OAuthRefreshToken if c.NoRefresh { refreshToken = "" } @@ -136,15 +145,30 @@ func (c *Config) RefreshToken(ctx context.Context, db database.Store, externalAu Expiry: externalAuthLink.OAuthExpiry, } + // Note: The TokenSource(...) method will make no remote HTTP requests if the + // token is expired and no refresh token is set. This is important to prevent + // spamming the API, consuming rate limits, when the token is known to fail. token, err := c.TokenSource(ctx, existingToken).Token() if err != nil { // TokenSource can fail for numerous reasons. If it fails because of // a bad refresh token, then the refresh token is invalid, and we should // get rid of it. Keeping it around will cause additional refresh // attempts that will fail and cost us api rate limits. + // + // The error message is saved for debugging purposes. if isFailedRefresh(existingToken, err) { + reason := err.Error() + if len(reason) > failureReasonLimit { + // Limit the length of the error message to prevent + // spamming the database with long error messages. + reason = reason[:failureReasonLimit] + } dbExecErr := db.UpdateExternalAuthLinkRefreshToken(ctx, database.UpdateExternalAuthLinkRefreshTokenParams{ - OAuthRefreshToken: "", // It is better to clear the refresh token than to keep retrying. + // Adding a reason will prevent further attempts to try and refresh the token. + OauthRefreshFailureReason: reason, + // Remove the invalid refresh token so it is never used again. The cached + // `reason` can be used to know why this field was zeroed out. + OAuthRefreshToken: "", OAuthRefreshTokenKeyID: externalAuthLink.OAuthRefreshTokenKeyID.String, UpdatedAt: dbtime.Now(), ProviderID: externalAuthLink.ProviderID, @@ -156,12 +180,28 @@ func (c *Config) RefreshToken(ctx context.Context, db database.Store, externalAu } // The refresh token was cleared externalAuthLink.OAuthRefreshToken = "" + externalAuthLink.UpdatedAt = dbtime.Now() } // Unfortunately have to match exactly on the error message string. // Improve the error message to account refresh tokens are deleted if // invalid on our end. + // + // This error messages comes from the oauth2 package on our client side. + // So this check is not against a server generated error message. + // Error source: https://github.com/golang/oauth2/blob/master/oauth2.go#L277 if err.Error() == "oauth2: token expired and refresh token is not set" { + if externalAuthLink.OauthRefreshFailureReason != "" { + // A cached refresh failure error exists. So the refresh token was set, but was invalid, and zeroed out. + // Return this cached error for the original refresh attempt. + return externalAuthLink, InvalidTokenError(fmt.Sprintf("token expired and refreshing failed %s with: %s", + // Do not return the exact time, because then we have to know what timezone the + // user is in. This approximate time is good enough. + humanize.Time(externalAuthLink.UpdatedAt), + externalAuthLink.OauthRefreshFailureReason, + )) + } + return externalAuthLink, InvalidTokenError("token expired, refreshing is either disabled or refreshing failed and will not be retried") } diff --git a/coderd/externalauth/externalauth_test.go b/coderd/externalauth/externalauth_test.go index 81cf5aa1f21e2..484d59beabb9b 100644 --- a/coderd/externalauth/externalauth_test.go +++ b/coderd/externalauth/externalauth_test.go @@ -177,19 +177,25 @@ func TestRefreshToken(t *testing.T) { link.OAuthExpiry = expired // Make the failure a server internal error. Not related to the token + // This should be retried since this error is temporary. refreshErr = &oauth2.RetrieveError{ Response: &http.Response{ StatusCode: http.StatusInternalServerError, }, ErrorCode: "internal_error", } - _, err := config.RefreshToken(ctx, mDB, link) - require.Error(t, err) - require.True(t, externalauth.IsInvalidTokenError(err)) - require.Equal(t, refreshCount, 1) + totalRefreshes := 0 + for i := 0; i < 3; i++ { + // Each loop will hit the temporary error and retry. + _, err := config.RefreshToken(ctx, mDB, link) + require.Error(t, err) + totalRefreshes++ + require.True(t, externalauth.IsInvalidTokenError(err)) + require.Equal(t, refreshCount, totalRefreshes) + } - // Try again with a bad refresh token error - // Expect DB call to remove the refresh token + // Try again with a bad refresh token error. This will invalidate the + // refresh token, and not retry again. Expect DB call to remove the refresh token mDB.EXPECT().UpdateExternalAuthLinkRefreshToken(gomock.Any(), gomock.Any()).Return(nil).Times(1) refreshErr = &oauth2.RetrieveError{ // github error Response: &http.Response{ @@ -197,17 +203,18 @@ func TestRefreshToken(t *testing.T) { }, ErrorCode: "bad_refresh_token", } - _, err = config.RefreshToken(ctx, mDB, link) + _, err := config.RefreshToken(ctx, mDB, link) require.Error(t, err) + totalRefreshes++ require.True(t, externalauth.IsInvalidTokenError(err)) - require.Equal(t, refreshCount, 2) + require.Equal(t, refreshCount, totalRefreshes) // When the refresh token is empty, no api calls should be made link.OAuthRefreshToken = "" // mock'd db, so manually set the token to '' _, err = config.RefreshToken(ctx, mDB, link) require.Error(t, err) require.True(t, externalauth.IsInvalidTokenError(err)) - require.Equal(t, refreshCount, 2) + require.Equal(t, refreshCount, totalRefreshes) }) // ValidateFailure tests if the token is no longer valid with a 401 response.