-
Notifications
You must be signed in to change notification settings - Fork 902
feat(coderd/healthcheck): add health check for proxy #10846
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a17ec4c
49b6e19
c7e202c
89bae7e
bcdc08c
340a276
d3478c8
c746021
05d4b09
56be002
1112b1c
fed35bd
7f8760a
5f5c486
b9cbdd8
ae30089
21f52bb
79dc190
2530320
f192488
b45e9c5
63fbafb
6de94bc
b57cdeb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,75 +2,136 @@ package healthcheck | |
|
||
import ( | ||
"context" | ||
"fmt" | ||
|
||
"golang.org/x/xerrors" | ||
|
||
"github.com/coder/coder/v2/buildinfo" | ||
"github.com/coder/coder/v2/coderd/healthcheck/health" | ||
"github.com/coder/coder/v2/coderd/util/ptr" | ||
"github.com/coder/coder/v2/codersdk" | ||
|
||
"github.com/hashicorp/go-multierror" | ||
) | ||
|
||
type WorkspaceProxyReportOptions struct { | ||
// CurrentVersion is the current server version. | ||
// We pass this in to make it easier to test. | ||
CurrentVersion string | ||
// FetchWorkspaceProxies is a function that returns the available workspace proxies. | ||
FetchWorkspaceProxies func(context.Context) (codersdk.RegionsResponse[codersdk.WorkspaceProxy], error) | ||
FetchWorkspaceProxies *func(context.Context) (codersdk.RegionsResponse[codersdk.WorkspaceProxy], error) | ||
// UpdateProxyHealth is a function called when healthcheck is run. | ||
// This would normally be ProxyHealth.ForceUpdate(). | ||
// We do this because if someone mashes the healthcheck refresh button | ||
// they would expect up-to-date data. | ||
UpdateProxyHealth func(context.Context) error | ||
UpdateProxyHealth *func(context.Context) error | ||
} | ||
|
||
// @typescript-generate Report | ||
type WorkspaceProxyReport struct { | ||
Healthy bool `json:"healthy"` | ||
Warnings []string `json:"warnings"` | ||
Error *string `json:"error"` | ||
Healthy bool `json:"healthy"` | ||
Severity health.Severity `json:"severity"` | ||
Warnings []string `json:"warnings"` | ||
Error *string `json:"error"` | ||
|
||
WorkspaceProxies codersdk.RegionsResponse[codersdk.WorkspaceProxy] | ||
} | ||
|
||
func (r *WorkspaceProxyReport) Run(ctx context.Context, opts *WorkspaceProxyReportOptions) { | ||
r.Healthy = true | ||
r.Severity = health.SeverityOK | ||
r.Warnings = []string{} | ||
|
||
if opts.FetchWorkspaceProxies == nil { | ||
return | ||
} | ||
fetchWorkspaceProxiesFunc := *opts.FetchWorkspaceProxies | ||
|
||
if opts.UpdateProxyHealth == nil { | ||
err := "opts.UpdateProxyHealth must not be nil if opts.FetchWorkspaceProxies is not nil" | ||
r.Error = ptr.Ref(err) | ||
return | ||
} | ||
updateProxyHealthFunc := *opts.UpdateProxyHealth | ||
|
||
if err := opts.UpdateProxyHealth(ctx); err != nil { | ||
r.Error = ptr.Ref(err.Error()) | ||
// If this fails, just mark it as a warning. It is still updated in the background. | ||
if err := updateProxyHealthFunc(ctx); err != nil { | ||
r.Severity = health.SeverityWarning | ||
r.Warnings = append(r.Warnings, xerrors.Errorf("update proxy health: %w", err).Error()) | ||
return | ||
} | ||
|
||
proxies, err := opts.FetchWorkspaceProxies(ctx) | ||
proxies, err := fetchWorkspaceProxiesFunc(ctx) | ||
if err != nil { | ||
r.Healthy = false | ||
r.Severity = health.SeverityError | ||
r.Error = ptr.Ref(err.Error()) | ||
return | ||
} | ||
|
||
r.WorkspaceProxies = proxies | ||
|
||
var numProxies int | ||
var healthyProxies int | ||
var total, healthy int | ||
for _, proxy := range r.WorkspaceProxies.Regions { | ||
numProxies++ | ||
total++ | ||
if proxy.Healthy { | ||
healthyProxies++ | ||
healthy++ | ||
} | ||
|
||
if len(proxy.Status.Report.Errors) > 0 { | ||
for _, err := range proxy.Status.Report.Errors { | ||
r.appendError(xerrors.New(err)) | ||
} | ||
} | ||
} | ||
|
||
r.Severity = calculateSeverity(total, healthy) | ||
r.Healthy = r.Severity != health.SeverityError | ||
|
||
// check versions | ||
if !buildinfo.VersionsMatch(proxy.Version, opts.CurrentVersion) { | ||
// Versions _must_ match. Perform this check last. This will clobber any other severity. | ||
for _, proxy := range r.WorkspaceProxies.Regions { | ||
if vErr := checkVersion(proxy, opts.CurrentVersion); vErr != nil { | ||
r.Healthy = false | ||
r.Warnings = append(r.Warnings, fmt.Sprintf("Proxy %q version %q does not match primary server version %q", proxy.Name, proxy.Version, opts.CurrentVersion)) | ||
r.Severity = health.SeverityError | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not suggesting, just asking: Is this always an error? Could it be a warning instead if it's a patch version difference, for instance? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our current behaviour won't even let workspace proxies register if they don't match major.minor, but makes no further complaints if it's just a patch version difference. Making it warn on a patch version difference sounds like a good idea in theory, but it will mean every time you upgrade, you'll get a short period of warning that there's a patch version diff. Although that's the current behaviour as well when you do a major version upgrade, so... 🤷 I'll see about adding it in a follow-up. |
||
r.appendError(vErr) | ||
} | ||
} | ||
} | ||
|
||
// appendError multierror-appends err onto r.Error. | ||
// We only have one error, so multiple errors need to be squashed in there. | ||
func (r *WorkspaceProxyReport) appendError(errs ...error) { | ||
var prevErr error | ||
if r.Error != nil { | ||
prevErr = xerrors.New(*r.Error) | ||
} | ||
r.Error = ptr.Ref(multierror.Append(prevErr, errs...).Error()) | ||
johnstcn marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
func checkVersion(proxy codersdk.WorkspaceProxy, currentVersion string) error { | ||
if proxy.Version == "" { | ||
return nil // may have not connected yet, this is OK | ||
} | ||
if buildinfo.VersionsMatch(proxy.Version, currentVersion) { | ||
return nil | ||
} | ||
|
||
return xerrors.Errorf("proxy %q version %q does not match primary server version %q", | ||
proxy.Name, | ||
proxy.Version, | ||
currentVersion, | ||
) | ||
} | ||
|
||
// calculateSeverity returns: | ||
// health.SeverityError if all proxies are unhealthy, | ||
// health.SeverityOK if all proxies are healthy, | ||
// health.SeverityWarning otherwise. | ||
func calculateSeverity(total, healthy int) health.Severity { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TBH this could probably be moved to a separate |
||
if total == 0 || total == healthy { | ||
return health.SeverityOK | ||
} | ||
if total-healthy == total { | ||
return health.SeverityError | ||
} | ||
return health.SeverityWarning | ||
} |
Uh oh!
There was an error while loading. Please reload this page.