chore: skip timing-sensistive AgentMetadata test in the standard suite

coder · ammario · May 2, 2023 · Apr 20, 2023 · Apr 21, 2023 · Apr 21, 2023
commit ca0125e6acea2cb0239ac5effca4dc1f4bb802aa
diff --git a/agent/agent_test.go b/agent/agent_test.go
@@ -1011,78 +1011,113 @@ func TestAgent_Metadata(t *testing.T) {
 	})
 
 	t.Run("Many", func(t *testing.T) {
+		t.Parallel()
+		script := "echo -n hello"
 		if runtime.GOOS == "windows" {
-			// Shell scripting in Windows is a pain, and we have already tested
-			// that the OS logic works in the simpler "Once" test above.
-			t.Skip()
+			script = "powershell " + script
 		}
-		t.Parallel()
-
-		dir := t.TempDir()
-
-		const reportInterval = 2
-		const intervalUnit = 100 * time.Millisecond
-		var (
-			greetingPath = filepath.Join(dir, "greeting")
-			script       = "echo hello | tee -a " + greetingPath
-		)
+		//nolint:dogsled
 		_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
 			Metadata: []codersdk.WorkspaceAgentMetadataDescription{
 				{
 					Key:      "greeting",
-					Interval: reportInterval,
+					Interval: 1,
 					Script:   script,
 				},
-				{
-					Key:      "bad",
-					Interval: reportInterval,
-					Script:   "exit 1",
-				},
 			},
 		}, 0)
 
+		var gotMd map[string]agentsdk.PostMetadataRequest
 		require.Eventually(t, func() bool {
-			return len(client.getMetadata()) == 2
+			gotMd = client.getMetadata()
+			return len(gotMd) == 1
 		}, testutil.WaitShort, testutil.IntervalMedium)
 
-		for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
-			md := client.getMetadata()
-			if len(md) != 2 {
-				panic("unexpected number of metadata entries")
-			}
+		collectedAt1 := gotMd["greeting"].CollectedAt
+		assert.Equal(t, "hello", gotMd["greeting"].Value)
 
-			require.Equal(t, "hello\n", md["greeting"].Value)
-			require.Equal(t, "exit status 1", md["bad"].Error)
+		if !assert.Eventually(t, func() bool {
+			gotMd = client.getMetadata()
+			return gotMd["greeting"].CollectedAt.After(collectedAt1)
+		}, testutil.WaitShort, testutil.IntervalMedium) {
+			t.Fatalf("expected metadata to be collected again")
+		}
+	})
+}
 
-			greetingByt, err := os.ReadFile(greetingPath)
-			require.NoError(t, err)
+func TestAgentMetadata_Timing(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		// Shell scripting in Windows is a pain, and we have already tested
+		// that the OS logic works in the simpler tests.
+		t.Skip()
+	}
+	testutil.SkipIfNotTiming(t)
+	t.Parallel()
 
-			var (
-				numGreetings      = bytes.Count(greetingByt, []byte("hello"))
-				idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
-				// We allow a 50% error margin because the report loop may backlog
-				// in CI and other toasters. In production, there is no hard
-				// guarantee on timing either, and the frontend gives similar
-				// wiggle room to the staleness of the value.
-				upperBound = int(idealNumGreetings) + 1
-				lowerBound = (int(idealNumGreetings) / 2)
-			)
-
-			if idealNumGreetings < 50 {
-				// There is an insufficient sample size.
-				continue
-			}
+	dir := t.TempDir()
 
-			t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
-			// The report loop may slow down on load, but it should never, ever
-			// speed up.
-			if numGreetings > upperBound {
-				t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
-			} else if numGreetings < lowerBound {
-				t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
-			}
+	const reportInterval = 2
+	const intervalUnit = 100 * time.Millisecond
+	var (
+		greetingPath = filepath.Join(dir, "greeting")
+		script       = "echo hello | tee -a " + greetingPath
+	)
+	_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
+		Metadata: []codersdk.WorkspaceAgentMetadataDescription{
+			{
+				Key:      "greeting",
+				Interval: reportInterval,
+				Script:   script,
+			},
+			{
+				Key:      "bad",
+				Interval: reportInterval,
+				Script:   "exit 1",
+			},
+		},
+	}, 0)
+
+	require.Eventually(t, func() bool {
+		return len(client.getMetadata()) == 2
+	}, testutil.WaitShort, testutil.IntervalMedium)
+
+	for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
+		md := client.getMetadata()
+		if len(md) != 2 {
+			panic("unexpected number of metadata entries")
 		}
-	})
+
+		require.Equal(t, "hello\n", md["greeting"].Value)
+		require.Equal(t, "exit status 1", md["bad"].Error)
+
+		greetingByt, err := os.ReadFile(greetingPath)
+		require.NoError(t, err)
+
+		var (
+			numGreetings      = bytes.Count(greetingByt, []byte("hello"))
+			idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
+			// We allow a 50% error margin because the report loop may backlog
+			// in CI and other toasters. In production, there is no hard
+			// guarantee on timing either, and the frontend gives similar
+			// wiggle room to the staleness of the value.
+			upperBound = int(idealNumGreetings) + 1
+			lowerBound = (int(idealNumGreetings) / 2)
+		)
+
+		if idealNumGreetings < 50 {
+			// There is an insufficient sample size.
+			continue
+		}
+
+		t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
+		// The report loop may slow down on load, but it should never, ever
+		// speed up.
+		if numGreetings > upperBound {
+			t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
+		} else if numGreetings < lowerBound {
+			t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
+		}
+	}
 }
 
 func TestAgent_Lifecycle(t *testing.T) {

diff --git a/testutil/timing.go b/testutil/timing.go
@@ -0,0 +1,21 @@
+package testutil
+
+import (
+	"flag"
+	"testing"
+)
+
+// We can't run timing-sensitive tests in CI because of the
+// great variance in runner performance. Instead of not testing timing at all,
+// we relegate it to humans manually running certain tests with the "-timing"
+// flag from time to time.
+//
+// Eventually, we should run all timing tests in a self-hosted runner.
+
+var timingFlag = flag.Bool("timing", false, "run timing-sensitive tests")
+
+func SkipIfNotTiming(t *testing.T) {
+	if !*timingFlag {
+		t.Skip("skipping timing-sensitive test")
+	}
+}