Skip to content

Commit 7028377

Browse files
committed
add retry loop
1 parent 84c3cf8 commit 7028377

File tree

1 file changed

+102
-63
lines changed

1 file changed

+102
-63
lines changed

agent/apphealth.go

Lines changed: 102 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2,94 +2,133 @@ package agent
22

33
import (
44
"context"
5+
"net/http"
6+
"net/url"
57
"sync"
68
"time"
79

10+
"golang.org/x/xerrors"
11+
812
"cdr.dev/slog"
913
"github.com/coder/coder/codersdk"
14+
"github.com/coder/retry"
1015
)
1116

12-
func reportAppHealth(ctx context.Context, logger slog.Logger, fetcher FetchWorkspaceApps, reporter PostWorkspaceAppHealth) {
13-
apps, err := fetcher(ctx)
14-
if err != nil {
15-
logger.Error(ctx, "failed to fetch workspace apps", slog.Error(err))
16-
return
17-
}
17+
func reportAppHealth(ctx context.Context, logger slog.Logger, fetchApps FetchWorkspaceApps, reportHealth PostWorkspaceAppHealth) {
18+
r := retry.New(time.Second, 30*time.Second)
19+
for {
20+
err := func() error {
21+
apps, err := fetchApps(ctx)
22+
if err != nil {
23+
return xerrors.Errorf("getting workspace apps: %w", err)
24+
}
1825

19-
if len(apps) == 0 {
20-
return
21-
}
26+
if len(apps) == 0 {
27+
return nil
28+
}
2229

23-
health := make(map[string]codersdk.WorkspaceAppHealth, 0)
24-
for _, app := range apps {
25-
health[app.Name] = app.Health
26-
}
30+
health := make(map[string]codersdk.WorkspaceAppHealth, 0)
31+
for _, app := range apps {
32+
health[app.Name] = app.Health
33+
}
2734

28-
tickers := make(chan string, 0)
29-
for _, app := range apps {
30-
if shouldStartTicker(app) {
31-
t := time.NewTicker(time.Duration(app.HealthcheckInterval) * time.Second)
35+
tickers := make(chan string, 0)
36+
for _, app := range apps {
37+
if shouldStartTicker(app) {
38+
t := time.NewTicker(time.Duration(app.HealthcheckInterval) * time.Second)
39+
go func() {
40+
for {
41+
select {
42+
case <-ctx.Done():
43+
return
44+
case <-t.C:
45+
tickers <- app.Name
46+
}
47+
}
48+
}()
49+
}
50+
}
51+
var mu sync.RWMutex
52+
var failures map[string]int
3253
go func() {
3354
for {
3455
select {
3556
case <-ctx.Done():
3657
return
37-
case <-t.C:
38-
tickers <- app.Name
58+
case name := <-tickers:
59+
for _, app := range apps {
60+
if app.Name != name {
61+
continue
62+
}
63+
64+
client := &http.Client{
65+
Timeout: time.Duration(app.HealthcheckInterval),
66+
}
67+
err := func() error {
68+
u, err := url.Parse(app.HealthcheckURL)
69+
if err != nil {
70+
return err
71+
}
72+
res, err := client.Do(&http.Request{
73+
Method: http.MethodGet,
74+
URL: u,
75+
})
76+
if err != nil {
77+
return err
78+
}
79+
res.Body.Close()
80+
if res.StatusCode > 499 {
81+
return xerrors.Errorf("error status code: %d", res.StatusCode)
82+
}
83+
84+
return nil
85+
}()
86+
if err == nil {
87+
mu.Lock()
88+
failures[app.Name]++
89+
if failures[app.Name] > int(app.HealthcheckThreshold) {
90+
health[app.Name] = codersdk.WorkspaceAppHealthUnhealthy
91+
}
92+
mu.Unlock()
93+
} else {
94+
mu.Lock()
95+
failures[app.Name] = 0
96+
health[app.Name] = codersdk.WorkspaceAppHealthHealthy
97+
mu.Unlock()
98+
}
99+
}
39100
}
40101
}
41102
}()
42-
}
43-
}
44-
var mu sync.RWMutex
45-
var failures map[string]int
46-
go func() {
47-
for {
48-
select {
49-
case <-ctx.Done():
50-
return
51-
case name := <-tickers:
52-
for _, app := range apps {
53-
if app.Name != name {
54-
continue
55-
}
56103

57-
func() {
58-
// do curl
59-
var err error
104+
reportTicker := time.NewTicker(time.Second)
105+
lastHealth := make(map[string]codersdk.WorkspaceAppHealth, 0)
106+
for {
107+
select {
108+
case <-ctx.Done():
109+
return nil
110+
case <-reportTicker.C:
111+
mu.RLock()
112+
changed := healthChanged(lastHealth, health)
113+
mu.Unlock()
114+
if changed {
115+
lastHealth = health
116+
err := reportHealth(ctx, health)
60117
if err != nil {
61-
mu.Lock()
62-
failures[app.Name]++
63-
mu.Unlock()
64-
return
118+
logger.Error(ctx, "failed to report workspace app stat", slog.Error(err))
65119
}
66-
mu.Lock()
67-
failures[app.Name] = 0
68-
mu.Unlock()
69-
}()
120+
}
70121
}
71122
}
123+
}()
124+
if err != nil {
125+
logger.Error(ctx, "failed running workspace app reporter", slog.Error(err))
126+
// continue loop with backoff on non-nil errors
127+
r.Wait(ctx)
128+
continue
72129
}
73-
}()
74130

75-
reportTicker := time.NewTicker(time.Second)
76-
lastHealth := make(map[string]codersdk.WorkspaceAppHealth, 0)
77-
for {
78-
select {
79-
case <-ctx.Done():
80-
return
81-
case <-reportTicker.C:
82-
mu.RLock()
83-
changed := healthChanged(lastHealth, health)
84-
mu.Unlock()
85-
if changed {
86-
lastHealth = health
87-
err := reporter(ctx, health)
88-
if err != nil {
89-
logger.Error(ctx, "failed to report workspace app stat", slog.Error(err))
90-
}
91-
}
92-
}
131+
return
93132
}
94133
}
95134

0 commit comments

Comments
 (0)