@@ -2,17 +2,18 @@ package executor
2
2
3
3
import (
4
4
"context"
5
+ "database/sql"
5
6
"encoding/json"
6
7
"time"
7
8
8
- "cdr.dev/slog"
9
-
10
- "github.com/coder/coder/coderd/autobuild/schedule"
11
- "github.com/coder/coder/coderd/database"
12
-
13
9
"github.com/google/uuid"
14
10
"github.com/moby/moby/pkg/namesgenerator"
11
+ "golang.org/x/sync/errgroup"
15
12
"golang.org/x/xerrors"
13
+
14
+ "cdr.dev/slog"
15
+ "github.com/coder/coder/coderd/autobuild/schedule"
16
+ "github.com/coder/coder/coderd/database"
16
17
)
17
18
18
19
// Executor automatically starts or stops workspaces.
@@ -89,77 +90,103 @@ func (e *Executor) runOnce(t time.Time) Stats {
89
90
stats .Error = err
90
91
}()
91
92
currentTick := t .Truncate (time .Minute )
92
- err = e .db .InTx (func (db database.Store ) error {
93
- // TTL is set at the workspace level, and deadline at the workspace build level.
94
- // When a workspace build is created, its deadline initially starts at zero.
95
- // When provisionerd successfully completes a provision job, the deadline is
96
- // set to now + TTL if the associated workspace has a TTL set. This deadline
97
- // is what we compare against when performing autostop operations, rounded down
98
- // to the minute.
99
- //
100
- // NOTE: If a workspace build is created with a given TTL and then the user either
101
- // changes or unsets the TTL, the deadline for the workspace build will not
102
- // have changed. This behavior is as expected per #2229.
103
- eligibleWorkspaces , err := db .GetWorkspacesAutostart (e .ctx )
104
- if err != nil {
105
- return xerrors .Errorf ("get eligible workspaces for autostart or autostop: %w" , err )
106
- }
107
93
108
- for _ , ws := range eligibleWorkspaces {
109
- // Determine the workspace state based on its latest build.
110
- priorHistory , err := db .GetLatestWorkspaceBuildByWorkspaceID (e .ctx , ws .ID )
111
- if err != nil {
112
- e .log .Warn (e .ctx , "get latest workspace build" ,
113
- slog .F ("workspace_id" , ws .ID ),
114
- slog .Error (err ),
115
- )
116
- continue
117
- }
94
+ // TTL is set at the workspace level, and deadline at the workspace build level.
95
+ // When a workspace build is created, its deadline initially starts at zero.
96
+ // When provisionerd successfully completes a provision job, the deadline is
97
+ // set to now + TTL if the associated workspace has a TTL set. This deadline
98
+ // is what we compare against when performing autostop operations, rounded down
99
+ // to the minute.
100
+ //
101
+ // NOTE: If a workspace build is created with a given TTL and then the user either
102
+ // changes or unsets the TTL, the deadline for the workspace build will not
103
+ // have changed. This behavior is as expected per #2229.
104
+ eligibleWorkspaces , err := e .db .GetWorkspacesAutostart (e .ctx )
105
+ if err != nil {
106
+ e .log .Error (e .ctx , "get eligible workspaces for autostart or autostop" , slog .Error (err ))
107
+ }
118
108
119
- priorJob , err := db .GetProvisionerJobByID (e .ctx , priorHistory .JobID )
120
- if err != nil {
121
- e .log .Warn (e .ctx , "get last provisioner job for workspace %q: %w" ,
122
- slog .F ("workspace_id" , ws .ID ),
123
- slog .Error (err ),
124
- )
125
- continue
126
- }
109
+ // We only use errgroup here for convenience of API, not for early
110
+ // cancellation. This means we only return nil errors in th eg.Go.
111
+ eg := errgroup.Group {}
112
+ // Limit the concurrency to avoid overloading the database.
113
+ eg .SetLimit (10 )
127
114
128
- validTransition , nextTransition , err := getNextTransition (ws , priorHistory , priorJob )
129
- if err != nil {
130
- e .log .Debug (e .ctx , "skipping workspace" ,
131
- slog .Error (err ),
132
- slog .F ("workspace_id" , ws .ID ),
133
- )
134
- continue
135
- }
115
+ for _ , ws := range eligibleWorkspaces {
116
+ ws := ws
117
+ log := e .log .With (slog .F ("workspace_id" , ws .ID ))
136
118
137
- if currentTick .Before (nextTransition ) {
138
- e .log .Debug (e .ctx , "skipping workspace: too early" ,
139
- slog .F ("workspace_id" , ws .ID ),
140
- slog .F ("next_transition_at" , nextTransition ),
141
- slog .F ("transition" , validTransition ),
142
- slog .F ("current_tick" , currentTick ),
143
- )
144
- continue
145
- }
119
+ eg .Go (func () error {
120
+ err := e .db .InTx (func (db database.Store ) error {
121
+ var err error
146
122
147
- e .log .Info (e .ctx , "scheduling workspace transition" ,
148
- slog .F ("workspace_id" , ws .ID ),
149
- slog .F ("transition" , validTransition ),
150
- )
123
+ // Re-check eligibility since the first check was outside the
124
+ // transaction and the workspace settings may have changed.
125
+ ws , err = db .GetWorkspaceAutostart (e .ctx , ws .ID )
126
+ if err != nil {
127
+ // Receiving ErrNoRows means the workspace settings changed
128
+ // and it is no longer eligible for autostart. Other errors
129
+ // means something went wrong.
130
+ if ! xerrors .Is (err , sql .ErrNoRows ) {
131
+ log .Error (e .ctx , "get workspace autostart failed" , slog .Error (err ))
132
+ }
133
+ return nil
134
+ }
135
+
136
+ // Determine the workspace state based on its latest build.
137
+ priorHistory , err := db .GetLatestWorkspaceBuildByWorkspaceID (e .ctx , ws .ID )
138
+ if err != nil {
139
+ log .Warn (e .ctx , "get latest workspace build" , slog .Error (err ))
140
+ return nil
141
+ }
142
+
143
+ priorJob , err := db .GetProvisionerJobByID (e .ctx , priorHistory .JobID )
144
+ if err != nil {
145
+ log .Warn (e .ctx , "get last provisioner job for workspace %q: %w" , slog .Error (err ))
146
+ return nil
147
+ }
148
+
149
+ validTransition , nextTransition , err := getNextTransition (ws , priorHistory , priorJob )
150
+ if err != nil {
151
+ log .Debug (e .ctx , "skipping workspace" , slog .Error (err ))
152
+ return nil
153
+ }
154
+
155
+ if currentTick .Before (nextTransition ) {
156
+ log .Debug (e .ctx , "skipping workspace: too early" ,
157
+ slog .F ("next_transition_at" , nextTransition ),
158
+ slog .F ("transition" , validTransition ),
159
+ slog .F ("current_tick" , currentTick ),
160
+ )
161
+ return nil
162
+ }
163
+
164
+ log .Info (e .ctx , "scheduling workspace transition" , slog .F ("transition" , validTransition ))
151
165
152
- stats .Transitions [ws .ID ] = validTransition
153
- if err := build (e .ctx , db , ws , validTransition , priorHistory , priorJob ); err != nil {
154
- e .log .Error (e .ctx , "unable to transition workspace" ,
155
- slog .F ("workspace_id" , ws .ID ),
156
- slog .F ("transition" , validTransition ),
157
- slog .Error (err ),
158
- )
166
+ stats .Transitions [ws .ID ] = validTransition
167
+ if err := build (e .ctx , db , ws , validTransition , priorHistory , priorJob ); err != nil {
168
+ log .Error (e .ctx , "unable to transition workspace" ,
169
+ slog .F ("transition" , validTransition ),
170
+ slog .Error (err ),
171
+ )
172
+ return nil
173
+ }
174
+
175
+ return nil
176
+ })
177
+ if err != nil {
178
+ log .Error (e .ctx , "workspace scheduling failed" , slog .Error (err ))
159
179
}
160
- }
161
- return nil
162
- })
180
+ return nil
181
+ })
182
+ }
183
+
184
+ // This should not happen since we don't want early cancellation.
185
+ err = eg .Wait ()
186
+ if err != nil {
187
+ e .log .Error (e .ctx , "workspace scheduling errgroup failed" , slog .Error (err ))
188
+ }
189
+
163
190
return stats
164
191
}
165
192
0 commit comments