Skip to content

feat: Collect agent SSH metrics #7584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 42 commits into from
May 25, 2023
Merged
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
092e1d0
feat: Collect agent SSH metrics
mtojek May 17, 2023
0c0df91
more metrics
mtojek May 17, 2023
84b59ea
err
mtojek May 17, 2023
000586a
session metrics
mtojek May 18, 2023
11fb056
session error
mtojek May 18, 2023
07b2b0e
fix
mtojek May 18, 2023
e38a7be
fmt
mtojek May 18, 2023
ba4bb4d
WIP
mtojek May 18, 2023
0d0f300
Refactored to client_golang/prometheus
mtojek May 18, 2023
315b5ce
fix
mtojek May 18, 2023
43d5d40
fix
mtojek May 18, 2023
85f8860
refactor
mtojek May 18, 2023
7b26267
Merge branch 'main' into 6724-ssh-metrics
mtojek May 18, 2023
34f07fc
refactor
mtojek May 18, 2023
59fd585
fix test
mtojek May 18, 2023
8cd927c
fix
mtojek May 18, 2023
6eec4d7
fix
mtojek May 18, 2023
a059edf
fix
mtojek May 18, 2023
c004c04
fix
mtojek May 18, 2023
9b0e31a
Address PR comments
mtojek May 19, 2023
7d4ccce
x11HostnameError
mtojek May 19, 2023
90b351d
Remove callbacks
mtojek May 19, 2023
1773c24
failedConnectionsTotal
mtojek May 19, 2023
5ac27b7
connectionsTotal
mtojek May 19, 2023
6eb1a95
sftpConnectionsTotal
mtojek May 19, 2023
e3d7493
sessionError
mtojek May 19, 2023
9620452
sftpServerErrors
mtojek May 19, 2023
f05466c
remove handlerError
mtojek May 19, 2023
5887ee8
WIP
mtojek May 19, 2023
bb3602b
WIP
mtojek May 22, 2023
27fc9a0
WIP
mtojek May 22, 2023
3f4696b
Finish impl
mtojek May 23, 2023
8cd07f2
Aggregator: labels
mtojek May 23, 2023
a51cde9
Merge branch 'main' into 6724-ssh-metrics
mtojek May 23, 2023
389dd9f
TestAgent_Metrics_SSH
mtojek May 23, 2023
8e10d6d
Address PR comments
mtojek May 24, 2023
1858dc2
use labelIndex
mtojek May 24, 2023
8dde9f9
Merge branch 'main' into 6724-ssh-metrics
mtojek May 24, 2023
db725a3
Merge branch 'main' into 6724-ssh-metrics
mtojek May 25, 2023
f416287
PR comments part 1
mtojek May 25, 2023
4daf37d
PR comments part 2
mtojek May 25, 2023
d9203b8
PR comments part 3
mtojek May 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
TestAgent_Metrics_SSH
  • Loading branch information
mtojek committed May 23, 2023
commit 389dd9f5b6dbd6410ce762f854c93d56e3a0e097
119 changes: 116 additions & 3 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (
"github.com/google/uuid"
"github.com/pion/udp"
"github.com/pkg/sftp"
"github.com/prometheus/client_golang/prometheus"
promgo "github.com/prometheus/client_model/go"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -1724,7 +1726,7 @@ func (c closeFunc) Close() error {
return c()
}

func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Duration) (
func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Duration, opts ...func(agent.Options) agent.Options) (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: remove ptyTimeout and set a default value in options

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I will push this in a separate PR. That function is used in ~30 places.

*codersdk.WorkspaceAgentConn,
*client,
<-chan *agentsdk.Stats,
Expand All @@ -1749,12 +1751,19 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati
statsChan: statsCh,
coordinator: coordinator,
}
closer := agent.New(agent.Options{

options := agent.Options{
Client: c,
Filesystem: fs,
Logger: logger.Named("agent"),
ReconnectingPTYTimeout: ptyTimeout,
})
}

for _, opt := range opts {
options = opt(options)
}

closer := agent.New(options)
t.Cleanup(func() {
_ = closer.Close()
})
Expand Down Expand Up @@ -1979,3 +1988,107 @@ func tempDirUnixSocket(t *testing.T) string {

return t.TempDir()
}

func TestAgent_Metrics_SSH(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()

registry := prometheus.NewRegistry()

//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(o agent.Options) agent.Options {
o.PrometheusRegistry = registry
return o
})

sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
session, err := sshClient.NewSession()
require.NoError(t, err)
defer session.Close()
stdin, err := session.StdinPipe()
require.NoError(t, err)
err = session.Shell()
require.NoError(t, err)

expected := []agentsdk.AgentMetric{
{
Name: "agent_reconnecting_pty_connections_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "agent_sessions_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 1,
Labels: []agentsdk.AgentMetricLabel{
{
Name: "magic_type",
Value: "ssh",
},
{
Name: "pty",
Value: "no",
},
},
},
{
Name: "agent_ssh_server_failed_connections_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "agent_ssh_server_sftp_connections_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "agent_ssh_server_sftp_server_errors_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
}

var actual []*promgo.MetricFamily
assert.Eventually(t, func() bool {
actual, err = registry.Gather()
if err != nil {
return false
}

if len(expected) != len(actual) {
return false
}

return verifyCollectedMetrics(t, expected, actual)
}, testutil.WaitLong, testutil.IntervalFast)

require.Len(t, actual, len(expected))
verifyCollectedMetrics(t, expected, actual)

_ = stdin.Close()
err = session.Wait()
require.NoError(t, err)
}

func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actual []*promgo.MetricFamily) bool {
for i, e := range expected {
require.Equal(t, e.Name, actual[i].GetName())
require.Equal(t, string(e.Type), strings.ToLower(actual[i].GetType().String()))

for _, m := range actual[i].GetMetric() {
require.Equal(t, e.Value, m.Counter.GetValue())

if len(m.GetLabel()) > 0 {
for j, lbl := range m.GetLabel() {
require.Equal(t, e.Labels[j].Name, lbl.GetName())
require.Equal(t, e.Labels[j].Value, lbl.GetValue())
}
}
m.GetLabel()
}
}
return true
}