Skip to content

Commit 321c2b8

Browse files
authored
fix: fix flake in TestExecutorAutostartSkipsWhenNoProvisionersAvailable (coder#19478)
The flake here had two causes: 1. related to usage of time.Now() in MustWaitForProvisionersAvailable and 2. the fact that UpdateProvisionerLastSeenAt can not use a time that is further in the past than the current LastSeenAt time Previously the test here was calling `coderdtest.MustWaitForProvisionersAvailable` which was using `time.Now` rather than the next tick time like the real `hasProvisionersAvailable` function does. Additionally, when using `UpdateProvisionerLastSeenAt` the underlying db query enforces that the time we're trying to set `LastSeenAt` to cannot be older than the current value. I was able to reliably reproduce the flake by executing both the `UpdateProvisionerLastSeenAt` call and `tickCh <- next` in their own goroutines, the former with a small sleep to reliably ensure we'd trigger the autobuild before we set the `LastSeenAt` time. That's when I also noticed that `coderdtest.MustWaitForProvisionersAvailable` was using `time.Now` instead of the tick time. When I updated that function to take in a tick time + added a 2nd call to `UpdateProvisionerLastSeenAt` to set an original non-stale time, we could then never get the test to pass because the later call to set the stale time would not actually modify `LastSeenAt`. On top of that, calling the provisioner daemons closer in the middle of the function doesn't really do anything of value in this test. **The fix for the flake is to keep the go routines, ensuring there would be a flake if there was not a relevant fix, but to include the fix which is to ensure that we explicitly wait for the provisioner to be stale before passing the time to `tickCh`.** --------- Signed-off-by: Callum Styan <callumstyan@gmail.com>
1 parent 95dccf3 commit 321c2b8

File tree

3 files changed

+63
-20
lines changed

3 files changed

+63
-20
lines changed

coderd/autobuild/lifecycle_executor_test.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"database/sql"
66
"errors"
7+
"sync"
78
"testing"
89
"time"
910

@@ -1720,19 +1721,32 @@ func TestExecutorAutostartSkipsWhenNoProvisionersAvailable(t *testing.T) {
17201721
// Stop the workspace while provisioner is available
17211722
workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop)
17221723

1723-
// Wait for provisioner to be available for this specific workspace
1724-
coderdtest.MustWaitForProvisionersAvailable(t, db, workspace)
17251724
p, err := coderdtest.GetProvisionerForTags(db, time.Now(), workspace.OrganizationID, provisionerDaemonTags)
17261725
require.NoError(t, err, "Error getting provisioner for workspace")
17271726

1728-
daemon1Closer.Close()
1727+
var wg sync.WaitGroup
1728+
wg.Add(2)
17291729

1730-
// Ensure the provisioner is stale
1731-
staleTime := sched.Next(workspace.LatestBuild.CreatedAt).Add((-1 * provisionerdserver.StaleInterval) + -10*time.Second)
1732-
coderdtest.UpdateProvisionerLastSeenAt(t, db, p.ID, staleTime)
1730+
next := sched.Next(workspace.LatestBuild.CreatedAt)
1731+
go func() {
1732+
defer wg.Done()
1733+
// Ensure the provisioner is stale
1734+
staleTime := next.Add(-(provisionerdserver.StaleInterval * 2))
1735+
coderdtest.UpdateProvisionerLastSeenAt(t, db, p.ID, staleTime)
1736+
p, err = coderdtest.GetProvisionerForTags(db, time.Now(), workspace.OrganizationID, provisionerDaemonTags)
1737+
assert.NoError(t, err, "Error getting provisioner for workspace")
1738+
assert.Eventually(t, func() bool { return p.LastSeenAt.Time.UnixNano() == staleTime.UnixNano() }, testutil.WaitMedium, testutil.IntervalFast)
1739+
}()
17331740

1734-
// Trigger autobuild
1735-
tickCh <- sched.Next(workspace.LatestBuild.CreatedAt)
1741+
go func() {
1742+
defer wg.Done()
1743+
// Ensure the provisioner is gone or stale before triggering the autobuild
1744+
coderdtest.MustWaitForProvisionersUnavailable(t, db, workspace, provisionerDaemonTags, next)
1745+
// Trigger autobuild
1746+
tickCh <- next
1747+
}()
1748+
1749+
wg.Wait()
17361750

17371751
stats := <-statsCh
17381752

@@ -1758,5 +1772,5 @@ func TestExecutorAutostartSkipsWhenNoProvisionersAvailable(t *testing.T) {
17581772
}()
17591773
stats = <-statsCh
17601774

1761-
assert.Len(t, stats.Transitions, 1, "should not create builds when no provisioners available")
1775+
assert.Len(t, stats.Transitions, 1, "should create builds when provisioners are available")
17621776
}

coderd/coderdtest/coderdtest.go

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,19 +1649,48 @@ func UpdateProvisionerLastSeenAt(t *testing.T, db database.Store, id uuid.UUID,
16491649
func MustWaitForAnyProvisioner(t *testing.T, db database.Store) {
16501650
t.Helper()
16511651
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitShort))
1652-
require.Eventually(t, func() bool {
1652+
// testutil.Eventually(t, func)
1653+
testutil.Eventually(ctx, t, func(ctx context.Context) (done bool) {
16531654
daemons, err := db.GetProvisionerDaemons(ctx)
16541655
return err == nil && len(daemons) > 0
1655-
}, testutil.WaitShort, testutil.IntervalFast)
1656+
}, testutil.IntervalFast, "no provisioner daemons found")
1657+
}
1658+
1659+
// MustWaitForProvisionersUnavailable waits for provisioners to become unavailable for a specific workspace
1660+
func MustWaitForProvisionersUnavailable(t *testing.T, db database.Store, workspace codersdk.Workspace, tags map[string]string, checkTime time.Time) {
1661+
t.Helper()
1662+
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitMedium))
1663+
1664+
testutil.Eventually(ctx, t, func(ctx context.Context) (done bool) {
1665+
// Use the same logic as hasValidProvisioner but expect false
1666+
provisionerDaemons, err := db.GetProvisionerDaemonsByOrganization(ctx, database.GetProvisionerDaemonsByOrganizationParams{
1667+
OrganizationID: workspace.OrganizationID,
1668+
WantTags: tags,
1669+
})
1670+
if err != nil {
1671+
return false
1672+
}
1673+
1674+
// Check if NO provisioners are active (all are stale or gone)
1675+
for _, pd := range provisionerDaemons {
1676+
if pd.LastSeenAt.Valid {
1677+
age := checkTime.Sub(pd.LastSeenAt.Time)
1678+
if age <= provisionerdserver.StaleInterval {
1679+
return false // Found an active provisioner, keep waiting
1680+
}
1681+
}
1682+
}
1683+
return true // No active provisioners found
1684+
}, testutil.IntervalFast, "there are still provisioners available for workspace, expected none")
16561685
}
16571686

16581687
// MustWaitForProvisionersAvailable waits for provisioners to be available for a specific workspace.
1659-
func MustWaitForProvisionersAvailable(t *testing.T, db database.Store, workspace codersdk.Workspace) uuid.UUID {
1688+
func MustWaitForProvisionersAvailable(t *testing.T, db database.Store, workspace codersdk.Workspace, ts time.Time) uuid.UUID {
16601689
t.Helper()
1661-
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitShort))
1690+
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitLong))
16621691
id := uuid.UUID{}
16631692
// Get the workspace from the database
1664-
require.Eventually(t, func() bool {
1693+
testutil.Eventually(ctx, t, func(ctx context.Context) (done bool) {
16651694
ws, err := db.GetWorkspaceByID(ctx, workspace.ID)
16661695
if err != nil {
16671696
return false
@@ -1689,18 +1718,17 @@ func MustWaitForProvisionersAvailable(t *testing.T, db database.Store, workspace
16891718
}
16901719

16911720
// Check if any provisioners are active (not stale)
1692-
now := time.Now()
16931721
for _, pd := range provisionerDaemons {
16941722
if pd.LastSeenAt.Valid {
1695-
age := now.Sub(pd.LastSeenAt.Time)
1723+
age := ts.Sub(pd.LastSeenAt.Time)
16961724
if age <= provisionerdserver.StaleInterval {
16971725
id = pd.ID
16981726
return true // Found an active provisioner
16991727
}
17001728
}
17011729
}
17021730
return false // No active provisioners found
1703-
}, testutil.WaitLong, testutil.IntervalFast)
1731+
}, testutil.IntervalFast, "no active provisioners available for workspace, expected at least one (non-stale)")
17041732

17051733
return id
17061734
}

enterprise/coderd/workspaces_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2242,13 +2242,14 @@ func TestPrebuildsAutobuild(t *testing.T) {
22422242
workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop)
22432243
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
22442244

2245+
p, err := coderdtest.GetProvisionerForTags(db, time.Now(), workspace.OrganizationID, nil)
2246+
coderdtest.UpdateProvisionerLastSeenAt(t, db, p.ID, sched.Next(prebuild.LatestBuild.CreatedAt))
2247+
22452248
// Wait for provisioner to be available for this specific workspace
2246-
coderdtest.MustWaitForProvisionersAvailable(t, db, prebuild)
2249+
coderdtest.MustWaitForProvisionersAvailable(t, db, prebuild, sched.Next(prebuild.LatestBuild.CreatedAt))
22472250

22482251
tickTime := sched.Next(prebuild.LatestBuild.CreatedAt).Add(time.Minute)
2249-
p, err := coderdtest.GetProvisionerForTags(db, time.Now(), workspace.OrganizationID, nil)
22502252
require.NoError(t, err)
2251-
coderdtest.UpdateProvisionerLastSeenAt(t, db, p.ID, tickTime)
22522253

22532254
// Tick at the next scheduled time after the prebuild’s LatestBuild.CreatedAt,
22542255
// since the next allowed autostart is calculated starting from that point.

0 commit comments

Comments
 (0)