Skip to content

chore: skip timing-sensistive AgentMetadata test in the standard suite #7237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 2, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
chore: skip timing-sensistive AgentMetadata test in the standard suite
  • Loading branch information
ammario committed Apr 21, 2023
commit ca0125e6acea2cb0239ac5effca4dc1f4bb802aa
141 changes: 88 additions & 53 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1011,78 +1011,113 @@ func TestAgent_Metadata(t *testing.T) {
})

t.Run("Many", func(t *testing.T) {
t.Parallel()
script := "echo -n hello"
if runtime.GOOS == "windows" {
// Shell scripting in Windows is a pain, and we have already tested
// that the OS logic works in the simpler "Once" test above.
t.Skip()
script = "powershell " + script
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be working, CI failing with missing hello. Does it need quoting?

}
t.Parallel()

dir := t.TempDir()

const reportInterval = 2
const intervalUnit = 100 * time.Millisecond
var (
greetingPath = filepath.Join(dir, "greeting")
script = "echo hello | tee -a " + greetingPath
)
//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting",
Interval: reportInterval,
Interval: 1,
Script: script,
},
{
Key: "bad",
Interval: reportInterval,
Script: "exit 1",
},
},
}, 0)

var gotMd map[string]agentsdk.PostMetadataRequest
require.Eventually(t, func() bool {
return len(client.getMetadata()) == 2
gotMd = client.getMetadata()
return len(gotMd) == 1
}, testutil.WaitShort, testutil.IntervalMedium)

for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
md := client.getMetadata()
if len(md) != 2 {
panic("unexpected number of metadata entries")
}
collectedAt1 := gotMd["greeting"].CollectedAt
assert.Equal(t, "hello", gotMd["greeting"].Value)

require.Equal(t, "hello\n", md["greeting"].Value)
require.Equal(t, "exit status 1", md["bad"].Error)
if !assert.Eventually(t, func() bool {
gotMd = client.getMetadata()
return gotMd["greeting"].CollectedAt.After(collectedAt1)
}, testutil.WaitShort, testutil.IntervalMedium) {
t.Fatalf("expected metadata to be collected again")
}
})
}

greetingByt, err := os.ReadFile(greetingPath)
require.NoError(t, err)
func TestAgentMetadata_Timing(t *testing.T) {
if runtime.GOOS == "windows" {
// Shell scripting in Windows is a pain, and we have already tested
// that the OS logic works in the simpler tests.
t.Skip()
}
testutil.SkipIfNotTiming(t)
t.Parallel()

var (
numGreetings = bytes.Count(greetingByt, []byte("hello"))
idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
// We allow a 50% error margin because the report loop may backlog
// in CI and other toasters. In production, there is no hard
// guarantee on timing either, and the frontend gives similar
// wiggle room to the staleness of the value.
upperBound = int(idealNumGreetings) + 1
lowerBound = (int(idealNumGreetings) / 2)
)

if idealNumGreetings < 50 {
// There is an insufficient sample size.
continue
}
dir := t.TempDir()

t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
// The report loop may slow down on load, but it should never, ever
// speed up.
if numGreetings > upperBound {
t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
} else if numGreetings < lowerBound {
t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
}
const reportInterval = 2
const intervalUnit = 100 * time.Millisecond
var (
greetingPath = filepath.Join(dir, "greeting")
script = "echo hello | tee -a " + greetingPath
)
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting",
Interval: reportInterval,
Script: script,
},
{
Key: "bad",
Interval: reportInterval,
Script: "exit 1",
},
},
}, 0)

require.Eventually(t, func() bool {
return len(client.getMetadata()) == 2
}, testutil.WaitShort, testutil.IntervalMedium)

for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
md := client.getMetadata()
if len(md) != 2 {
panic("unexpected number of metadata entries")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's replace this with a require btw.

}
})

require.Equal(t, "hello\n", md["greeting"].Value)
require.Equal(t, "exit status 1", md["bad"].Error)

greetingByt, err := os.ReadFile(greetingPath)
require.NoError(t, err)

var (
numGreetings = bytes.Count(greetingByt, []byte("hello"))
idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
// We allow a 50% error margin because the report loop may backlog
// in CI and other toasters. In production, there is no hard
// guarantee on timing either, and the frontend gives similar
// wiggle room to the staleness of the value.
upperBound = int(idealNumGreetings) + 1
lowerBound = (int(idealNumGreetings) / 2)
)

if idealNumGreetings < 50 {
// There is an insufficient sample size.
continue
}

t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
// The report loop may slow down on load, but it should never, ever
// speed up.
if numGreetings > upperBound {
t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
} else if numGreetings < lowerBound {
t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
}
}
}

func TestAgent_Lifecycle(t *testing.T) {
Expand Down
21 changes: 21 additions & 0 deletions testutil/timing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package testutil

import (
"flag"
"testing"
)

// We can't run timing-sensitive tests in CI because of the
// great variance in runner performance. Instead of not testing timing at all,
// we relegate it to humans manually running certain tests with the "-timing"
// flag from time to time.
//
// Eventually, we should run all timing tests in a self-hosted runner.

var timingFlag = flag.Bool("timing", false, "run timing-sensitive tests")
Copy link
Member

@mafredri mafredri Apr 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we won't be able to do go test ./... -timing unless all test packages import testutil, a shortcoming of using flags in tests. I think the only way around it would be to use build tags instead but maybe this won't be an issue.

Copy link
Member Author

@ammario ammario Apr 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I explored build tags but decided against it because it could lead to file bloat.

Let me know what you think about ba6f151.

Copy link
Member

@mafredri mafredri Apr 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I explored build tags but decided against it because it could lead to file bloat.

I was thinking it could be contained to the testutil package using the checking function you already implemented, so only 1 more file should be necessary. The build tag would then decide if the test becomes enabled or not.

Example:

//go:build timing

package testutil

var _ = func() any {
	timing = true
	return nil
}()

(Or use init, but the above avoids using that reserved name, tags/duped func in both files is another option.)

This would allow running the entire test suite with go test ./... -tags=timing. Or limiting to timing tests via go test ./... -tags=timing -run='_Timing$'.

Let me know what you think about ba6f151.

I think that looks alright since it requires an intent to run, otherwise generating go_tests.json would be too slow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice idea, let's go that way. See new code.


func SkipIfNotTiming(t *testing.T) {
if !*timingFlag {
t.Skip("skipping timing-sensitive test")
}
}