@@ -2,94 +2,133 @@ package agent
2
2
3
3
import (
4
4
"context"
5
+ "net/http"
6
+ "net/url"
5
7
"sync"
6
8
"time"
7
9
10
+ "golang.org/x/xerrors"
11
+
8
12
"cdr.dev/slog"
9
13
"github.com/coder/coder/codersdk"
14
+ "github.com/coder/retry"
10
15
)
11
16
12
- func reportAppHealth (ctx context.Context , logger slog.Logger , fetcher FetchWorkspaceApps , reporter PostWorkspaceAppHealth ) {
13
- apps , err := fetcher (ctx )
14
- if err != nil {
15
- logger .Error (ctx , "failed to fetch workspace apps" , slog .Error (err ))
16
- return
17
- }
17
+ func reportAppHealth (ctx context.Context , logger slog.Logger , fetchApps FetchWorkspaceApps , reportHealth PostWorkspaceAppHealth ) {
18
+ r := retry .New (time .Second , 30 * time .Second )
19
+ for {
20
+ err := func () error {
21
+ apps , err := fetchApps (ctx )
22
+ if err != nil {
23
+ return xerrors .Errorf ("getting workspace apps: %w" , err )
24
+ }
18
25
19
- if len (apps ) == 0 {
20
- return
21
- }
26
+ if len (apps ) == 0 {
27
+ return nil
28
+ }
22
29
23
- health := make (map [string ]codersdk.WorkspaceAppHealth , 0 )
24
- for _ , app := range apps {
25
- health [app .Name ] = app .Health
26
- }
30
+ health := make (map [string ]codersdk.WorkspaceAppHealth , 0 )
31
+ for _ , app := range apps {
32
+ health [app .Name ] = app .Health
33
+ }
27
34
28
- tickers := make (chan string , 0 )
29
- for _ , app := range apps {
30
- if shouldStartTicker (app ) {
31
- t := time .NewTicker (time .Duration (app .HealthcheckInterval ) * time .Second )
35
+ tickers := make (chan string , 0 )
36
+ for _ , app := range apps {
37
+ if shouldStartTicker (app ) {
38
+ t := time .NewTicker (time .Duration (app .HealthcheckInterval ) * time .Second )
39
+ go func () {
40
+ for {
41
+ select {
42
+ case <- ctx .Done ():
43
+ return
44
+ case <- t .C :
45
+ tickers <- app .Name
46
+ }
47
+ }
48
+ }()
49
+ }
50
+ }
51
+ var mu sync.RWMutex
52
+ var failures map [string ]int
32
53
go func () {
33
54
for {
34
55
select {
35
56
case <- ctx .Done ():
36
57
return
37
- case <- t .C :
38
- tickers <- app .Name
58
+ case name := <- tickers :
59
+ for _ , app := range apps {
60
+ if app .Name != name {
61
+ continue
62
+ }
63
+
64
+ client := & http.Client {
65
+ Timeout : time .Duration (app .HealthcheckInterval ),
66
+ }
67
+ err := func () error {
68
+ u , err := url .Parse (app .HealthcheckURL )
69
+ if err != nil {
70
+ return err
71
+ }
72
+ res , err := client .Do (& http.Request {
73
+ Method : http .MethodGet ,
74
+ URL : u ,
75
+ })
76
+ if err != nil {
77
+ return err
78
+ }
79
+ res .Body .Close ()
80
+ if res .StatusCode > 499 {
81
+ return xerrors .Errorf ("error status code: %d" , res .StatusCode )
82
+ }
83
+
84
+ return nil
85
+ }()
86
+ if err == nil {
87
+ mu .Lock ()
88
+ failures [app .Name ]++
89
+ if failures [app .Name ] > int (app .HealthcheckThreshold ) {
90
+ health [app .Name ] = codersdk .WorkspaceAppHealthUnhealthy
91
+ }
92
+ mu .Unlock ()
93
+ } else {
94
+ mu .Lock ()
95
+ failures [app .Name ] = 0
96
+ health [app .Name ] = codersdk .WorkspaceAppHealthHealthy
97
+ mu .Unlock ()
98
+ }
99
+ }
39
100
}
40
101
}
41
102
}()
42
- }
43
- }
44
- var mu sync.RWMutex
45
- var failures map [string ]int
46
- go func () {
47
- for {
48
- select {
49
- case <- ctx .Done ():
50
- return
51
- case name := <- tickers :
52
- for _ , app := range apps {
53
- if app .Name != name {
54
- continue
55
- }
56
103
57
- func () {
58
- // do curl
59
- var err error
104
+ reportTicker := time .NewTicker (time .Second )
105
+ lastHealth := make (map [string ]codersdk.WorkspaceAppHealth , 0 )
106
+ for {
107
+ select {
108
+ case <- ctx .Done ():
109
+ return nil
110
+ case <- reportTicker .C :
111
+ mu .RLock ()
112
+ changed := healthChanged (lastHealth , health )
113
+ mu .Unlock ()
114
+ if changed {
115
+ lastHealth = health
116
+ err := reportHealth (ctx , health )
60
117
if err != nil {
61
- mu .Lock ()
62
- failures [app .Name ]++
63
- mu .Unlock ()
64
- return
118
+ logger .Error (ctx , "failed to report workspace app stat" , slog .Error (err ))
65
119
}
66
- mu .Lock ()
67
- failures [app .Name ] = 0
68
- mu .Unlock ()
69
- }()
120
+ }
70
121
}
71
122
}
123
+ }()
124
+ if err != nil {
125
+ logger .Error (ctx , "failed running workspace app reporter" , slog .Error (err ))
126
+ // continue loop with backoff on non-nil errors
127
+ r .Wait (ctx )
128
+ continue
72
129
}
73
- }()
74
130
75
- reportTicker := time .NewTicker (time .Second )
76
- lastHealth := make (map [string ]codersdk.WorkspaceAppHealth , 0 )
77
- for {
78
- select {
79
- case <- ctx .Done ():
80
- return
81
- case <- reportTicker .C :
82
- mu .RLock ()
83
- changed := healthChanged (lastHealth , health )
84
- mu .Unlock ()
85
- if changed {
86
- lastHealth = health
87
- err := reporter (ctx , health )
88
- if err != nil {
89
- logger .Error (ctx , "failed to report workspace app stat" , slog .Error (err ))
90
- }
91
- }
92
- }
131
+ return
93
132
}
94
133
}
95
134
0 commit comments