Skip to content

test(agent): fix service banner and metadata intervals #8516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 54 additions & 62 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"encoding/binary"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"net"
Expand Down Expand Up @@ -52,21 +51,22 @@ const (
)

type Options struct {
Filesystem afero.Fs
LogDir string
TempDir string
ExchangeToken func(ctx context.Context) (string, error)
Client Client
ReconnectingPTYTimeout time.Duration
EnvironmentVariables map[string]string
Logger slog.Logger
IgnorePorts map[int]string
SSHMaxTimeout time.Duration
TailnetListenPort uint16
Subsystem codersdk.AgentSubsystem
Addresses []netip.Prefix

PrometheusRegistry *prometheus.Registry
Filesystem afero.Fs
LogDir string
TempDir string
ExchangeToken func(ctx context.Context) (string, error)
Client Client
ReconnectingPTYTimeout time.Duration
EnvironmentVariables map[string]string
Logger slog.Logger
IgnorePorts map[int]string
SSHMaxTimeout time.Duration
TailnetListenPort uint16
Subsystem codersdk.AgentSubsystem
Addresses []netip.Prefix
PrometheusRegistry *prometheus.Registry
ReportMetadataInterval time.Duration
ServiceBannerRefreshInterval time.Duration
}

type Client interface {
Expand Down Expand Up @@ -107,6 +107,12 @@ func New(options Options) Agent {
return "", nil
}
}
if options.ReportMetadataInterval == 0 {
options.ReportMetadataInterval = 1 * time.Minute
}
if options.ServiceBannerRefreshInterval == 0 {
options.ServiceBannerRefreshInterval = 2 * time.Minute
}

prometheusRegistry := options.PrometheusRegistry
if prometheusRegistry == nil {
Expand All @@ -115,25 +121,27 @@ func New(options Options) Agent {

ctx, cancelFunc := context.WithCancel(context.Background())
a := &agent{
tailnetListenPort: options.TailnetListenPort,
reconnectingPTYTimeout: options.ReconnectingPTYTimeout,
logger: options.Logger,
closeCancel: cancelFunc,
closed: make(chan struct{}),
envVars: options.EnvironmentVariables,
client: options.Client,
exchangeToken: options.ExchangeToken,
filesystem: options.Filesystem,
logDir: options.LogDir,
tempDir: options.TempDir,
lifecycleUpdate: make(chan struct{}, 1),
lifecycleReported: make(chan codersdk.WorkspaceAgentLifecycle, 1),
lifecycleStates: []agentsdk.PostLifecycleRequest{{State: codersdk.WorkspaceAgentLifecycleCreated}},
ignorePorts: options.IgnorePorts,
connStatsChan: make(chan *agentsdk.Stats, 1),
sshMaxTimeout: options.SSHMaxTimeout,
subsystem: options.Subsystem,
addresses: options.Addresses,
tailnetListenPort: options.TailnetListenPort,
reconnectingPTYTimeout: options.ReconnectingPTYTimeout,
logger: options.Logger,
closeCancel: cancelFunc,
closed: make(chan struct{}),
envVars: options.EnvironmentVariables,
client: options.Client,
exchangeToken: options.ExchangeToken,
filesystem: options.Filesystem,
logDir: options.LogDir,
tempDir: options.TempDir,
lifecycleUpdate: make(chan struct{}, 1),
lifecycleReported: make(chan codersdk.WorkspaceAgentLifecycle, 1),
lifecycleStates: []agentsdk.PostLifecycleRequest{{State: codersdk.WorkspaceAgentLifecycleCreated}},
ignorePorts: options.IgnorePorts,
connStatsChan: make(chan *agentsdk.Stats, 1),
reportMetadataInterval: options.ReportMetadataInterval,
serviceBannerRefreshInterval: options.ServiceBannerRefreshInterval,
sshMaxTimeout: options.SSHMaxTimeout,
subsystem: options.Subsystem,
addresses: options.Addresses,

prometheusRegistry: prometheusRegistry,
metrics: newAgentMetrics(prometheusRegistry),
Expand Down Expand Up @@ -165,13 +173,14 @@ type agent struct {
closed chan struct{}

envVars map[string]string
// manifest is atomic because values can change after reconnection.
manifest atomic.Pointer[agentsdk.Manifest]
// serviceBanner is atomic because it can change.
serviceBanner atomic.Pointer[codersdk.ServiceBannerConfig]
sessionToken atomic.Pointer[string]
sshServer *agentssh.Server
sshMaxTimeout time.Duration

manifest atomic.Pointer[agentsdk.Manifest] // manifest is atomic because values can change after reconnection.
reportMetadataInterval time.Duration
serviceBanner atomic.Pointer[codersdk.ServiceBannerConfig] // serviceBanner is atomic because it is periodically updated.
serviceBannerRefreshInterval time.Duration
sessionToken atomic.Pointer[string]
sshServer *agentssh.Server
sshMaxTimeout time.Duration

lifecycleUpdate chan struct{}
lifecycleReported chan codersdk.WorkspaceAgentLifecycle
Expand Down Expand Up @@ -283,17 +292,6 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
return result
}

// adjustIntervalForTests returns a duration of testInterval milliseconds long
// for tests and interval seconds long otherwise.
func adjustIntervalForTests(interval time.Duration, testInterval time.Duration) time.Duration {
// In tests we want to set shorter intervals because engineers are
// impatient.
if flag.Lookup("test.v") != nil {
return testInterval
}
return interval
}

type metadataResultAndKey struct {
result *codersdk.WorkspaceAgentMetadataResult
key string
Expand All @@ -315,12 +313,10 @@ func (t *trySingleflight) Do(key string, fn func()) {
}

func (a *agent) reportMetadataLoop(ctx context.Context) {
baseInterval := adjustIntervalForTests(time.Second, time.Millisecond*100)

const metadataLimit = 128

var (
baseTicker = time.NewTicker(baseInterval)
baseTicker = time.NewTicker(a.reportMetadataInterval)
lastCollectedAts = make(map[string]time.Time)
metadataResults = make(chan metadataResultAndKey, metadataLimit)
)
Expand Down Expand Up @@ -391,11 +387,7 @@ func (a *agent) reportMetadataLoop(ctx context.Context) {
continue
}
// The last collected value isn't quite stale yet, so we skip it.
if collectedAt.Add(
adjustIntervalForTests(
time.Duration(md.Interval)*time.Second,
time.Duration(md.Interval)*time.Millisecond*100),
).After(time.Now()) {
if collectedAt.Add(a.reportMetadataInterval).After(time.Now()) {
continue
}
}
Expand Down Expand Up @@ -506,7 +498,7 @@ func (a *agent) setLifecycle(ctx context.Context, state codersdk.WorkspaceAgentL
// not be fetched immediately; the expectation is that it is primed elsewhere
// (and must be done before the session actually starts).
func (a *agent) fetchServiceBannerLoop(ctx context.Context) {
ticker := time.NewTicker(adjustIntervalForTests(2*time.Minute, time.Millisecond*5))
ticker := time.NewTicker(a.serviceBannerRefreshInterval)
defer ticker.Stop()
for {
select {
Expand Down
30 changes: 19 additions & 11 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,12 @@ func TestAgent_Session_TTY_MOTD_Update(t *testing.T) {

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()

setSBInterval := func(_ *agenttest.Client, opts *agent.Options) {
opts.ServiceBannerRefreshInterval = 5 * time.Millisecond
}
//nolint:dogsled // Allow the blank identifiers.
conn, client, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
conn, client, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, setSBInterval)
for _, test := range tests {
test := test
// Set new banner func and wait for the agent to call it to update the
Expand Down Expand Up @@ -1143,7 +1147,9 @@ func TestAgent_Metadata(t *testing.T) {
Script: echoHello,
},
},
}, 0)
}, 0, func(_ *agenttest.Client, opts *agent.Options) {
opts.ReportMetadataInterval = 100 * time.Millisecond
})

var gotMd map[string]agentsdk.PostMetadataRequest
require.Eventually(t, func() bool {
Expand Down Expand Up @@ -1174,23 +1180,23 @@ func TestAgent_Metadata(t *testing.T) {
Script: echoHello,
},
},
}, 0)
}, 0, func(_ *agenttest.Client, opts *agent.Options) {
opts.ReportMetadataInterval = testutil.IntervalFast
})

var gotMd map[string]agentsdk.PostMetadataRequest
require.Eventually(t, func() bool {
gotMd = client.GetMetadata()
return len(gotMd) == 1
}, testutil.WaitShort, testutil.IntervalMedium)
}, testutil.WaitShort, testutil.IntervalFast/2)

collectedAt1 := gotMd["greeting"].CollectedAt
if !assert.Equal(t, "hello", strings.TrimSpace(gotMd["greeting"].Value)) {
t.Errorf("got: %+v", gotMd)
}
require.Equal(t, "hello", strings.TrimSpace(gotMd["greeting"].Value))

if !assert.Eventually(t, func() bool {
gotMd = client.GetMetadata()
return gotMd["greeting"].CollectedAt.After(collectedAt1)
}, testutil.WaitShort, testutil.IntervalMedium) {
}, testutil.WaitShort, testutil.IntervalFast/2) {
t.Fatalf("expected metadata to be collected again")
}
})
Expand Down Expand Up @@ -1227,7 +1233,9 @@ func TestAgentMetadata_Timing(t *testing.T) {
Script: "exit 1",
},
},
}, 0)
}, 0, func(_ *agenttest.Client, opts *agent.Options) {
opts.ReportMetadataInterval = intervalUnit
})

require.Eventually(t, func() bool {
return len(client.GetMetadata()) == 2
Expand Down Expand Up @@ -1844,14 +1852,14 @@ func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) (*pt

func setupSSHSession(
t *testing.T,
options agentsdk.Manifest,
manifest agentsdk.Manifest,
serviceBanner codersdk.ServiceBannerConfig,
prepareFS func(fs afero.Fs),
) *ssh.Session {
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, _, fs, _ := setupAgent(t, options, 0, func(c *agenttest.Client, _ *agent.Options) {
conn, _, _, fs, _ := setupAgent(t, manifest, 0, func(c *agenttest.Client, _ *agent.Options) {
c.SetServiceBannerFunc(func() (codersdk.ServiceBannerConfig, error) {
return serviceBanner, nil
})
Expand Down