Skip to content

Commit fdd60d3

Browse files
authored
fix: fix MetricsAggregator check for metric sameness (#11508)
Fixes #11451 A refactor of the Agent API passes metrics as protobufs, which include pointers to label name/value pairs. The aggregator tested for sameness by doing a shallow compare of label values, which for different stats reports would compare unequal because the pointers would be different. This fix does a deep compare. While testing I also noted that we neglect to compare template names. This is unlikely to have caused any issue in practice, since the combination of username/workspace is unique, but in the context of comparing metric labels we should do the comparison. If a user creates a workspace, deletes it, then recreates from a different template, we could in principle have reported incorrect stats for the old template.
1 parent 21093c0 commit fdd60d3

File tree

5 files changed

+335
-2
lines changed

5 files changed

+335
-2
lines changed

agent/proto/compare.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package proto
2+
3+
func LabelsEqual(a, b []*Stats_Metric_Label) bool {
4+
am := make(map[string]string, len(a))
5+
for _, lbl := range a {
6+
v := lbl.GetValue()
7+
if v == "" {
8+
// Prometheus considers empty labels as equivalent to being absent
9+
continue
10+
}
11+
am[lbl.GetName()] = lbl.GetValue()
12+
}
13+
lenB := 0
14+
for _, lbl := range b {
15+
v := lbl.GetValue()
16+
if v == "" {
17+
// Prometheus considers empty labels as equivalent to being absent
18+
continue
19+
}
20+
lenB++
21+
if am[lbl.GetName()] != v {
22+
return false
23+
}
24+
}
25+
return len(am) == lenB
26+
}

agent/proto/compare_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package proto_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/require"
7+
8+
"github.com/coder/coder/v2/agent/proto"
9+
)
10+
11+
func TestLabelsEqual(t *testing.T) {
12+
t.Parallel()
13+
for _, tc := range []struct {
14+
name string
15+
a []*proto.Stats_Metric_Label
16+
b []*proto.Stats_Metric_Label
17+
eq bool
18+
}{
19+
{
20+
name: "mainlineEq",
21+
a: []*proto.Stats_Metric_Label{
22+
{Name: "credulity", Value: "sus"},
23+
{Name: "color", Value: "aquamarine"},
24+
},
25+
b: []*proto.Stats_Metric_Label{
26+
{Name: "credulity", Value: "sus"},
27+
{Name: "color", Value: "aquamarine"},
28+
},
29+
eq: true,
30+
},
31+
{
32+
name: "emptyValue",
33+
a: []*proto.Stats_Metric_Label{
34+
{Name: "credulity", Value: "sus"},
35+
{Name: "color", Value: "aquamarine"},
36+
{Name: "singularity", Value: ""},
37+
},
38+
b: []*proto.Stats_Metric_Label{
39+
{Name: "credulity", Value: "sus"},
40+
{Name: "color", Value: "aquamarine"},
41+
},
42+
eq: true,
43+
},
44+
{
45+
name: "extra",
46+
a: []*proto.Stats_Metric_Label{
47+
{Name: "credulity", Value: "sus"},
48+
{Name: "color", Value: "aquamarine"},
49+
{Name: "opacity", Value: "seyshells"},
50+
},
51+
b: []*proto.Stats_Metric_Label{
52+
{Name: "credulity", Value: "sus"},
53+
{Name: "color", Value: "aquamarine"},
54+
},
55+
eq: false,
56+
},
57+
{
58+
name: "different",
59+
a: []*proto.Stats_Metric_Label{
60+
{Name: "credulity", Value: "sus"},
61+
{Name: "color", Value: "aquamarine"},
62+
},
63+
b: []*proto.Stats_Metric_Label{
64+
{Name: "credulity", Value: "legit"},
65+
{Name: "color", Value: "aquamarine"},
66+
},
67+
eq: false,
68+
},
69+
} {
70+
tc := tc
71+
t.Run(tc.name, func(t *testing.T) {
72+
t.Parallel()
73+
require.Equal(t, tc.eq, proto.LabelsEqual(tc.a, tc.b))
74+
require.Equal(t, tc.eq, proto.LabelsEqual(tc.b, tc.a))
75+
})
76+
}
77+
}

coderd/prometheusmetrics/aggregator.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"time"
66

77
"github.com/prometheus/client_golang/prometheus"
8-
"golang.org/x/exp/slices"
98
"golang.org/x/xerrors"
109

1110
"cdr.dev/slog"
@@ -68,7 +67,12 @@ type annotatedMetric struct {
6867
var _ prometheus.Collector = new(MetricsAggregator)
6968

7069
func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool {
71-
return am.username == req.username && am.workspaceName == req.workspaceName && am.agentName == req.agentName && am.Name == m.Name && slices.Equal(am.Labels, m.Labels)
70+
return am.username == req.username &&
71+
am.workspaceName == req.workspaceName &&
72+
am.agentName == req.agentName &&
73+
am.templateName == req.templateName &&
74+
am.Name == m.Name &&
75+
agentproto.LabelsEqual(am.Labels, m.Labels)
7276
}
7377

7478
func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) {
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
package prometheusmetrics
2+
3+
import (
4+
"testing"
5+
"time"
6+
7+
"github.com/stretchr/testify/require"
8+
9+
"github.com/coder/coder/v2/agent/proto"
10+
)
11+
12+
func TestAnnotatedMetric_Is(t *testing.T) {
13+
t.Parallel()
14+
am1 := &annotatedMetric{
15+
Stats_Metric: &proto.Stats_Metric{
16+
Name: "met",
17+
Type: proto.Stats_Metric_COUNTER,
18+
Value: 1,
19+
Labels: []*proto.Stats_Metric_Label{
20+
{Name: "rarity", Value: "blue moon"},
21+
{Name: "certainty", Value: "yes"},
22+
},
23+
},
24+
username: "spike",
25+
workspaceName: "work",
26+
agentName: "janus",
27+
templateName: "tempe",
28+
expiryDate: time.Now(),
29+
}
30+
for _, tc := range []struct {
31+
name string
32+
req updateRequest
33+
m *proto.Stats_Metric
34+
is bool
35+
}{
36+
{
37+
name: "OK",
38+
req: updateRequest{
39+
username: "spike",
40+
workspaceName: "work",
41+
agentName: "janus",
42+
templateName: "tempe",
43+
metrics: nil,
44+
timestamp: time.Now().Add(-5 * time.Second),
45+
},
46+
m: &proto.Stats_Metric{
47+
Name: "met",
48+
Type: proto.Stats_Metric_COUNTER,
49+
Value: 2,
50+
Labels: []*proto.Stats_Metric_Label{
51+
{Name: "rarity", Value: "blue moon"},
52+
{Name: "certainty", Value: "yes"},
53+
},
54+
},
55+
is: true,
56+
},
57+
{
58+
name: "missingLabel",
59+
req: updateRequest{
60+
username: "spike",
61+
workspaceName: "work",
62+
agentName: "janus",
63+
templateName: "tempe",
64+
metrics: nil,
65+
timestamp: time.Now().Add(-5 * time.Second),
66+
},
67+
m: &proto.Stats_Metric{
68+
Name: "met",
69+
Type: proto.Stats_Metric_COUNTER,
70+
Value: 2,
71+
Labels: []*proto.Stats_Metric_Label{
72+
{Name: "certainty", Value: "yes"},
73+
},
74+
},
75+
is: false,
76+
},
77+
{
78+
name: "wrongLabelValue",
79+
req: updateRequest{
80+
username: "spike",
81+
workspaceName: "work",
82+
agentName: "janus",
83+
templateName: "tempe",
84+
metrics: nil,
85+
timestamp: time.Now().Add(-5 * time.Second),
86+
},
87+
m: &proto.Stats_Metric{
88+
Name: "met",
89+
Type: proto.Stats_Metric_COUNTER,
90+
Value: 2,
91+
Labels: []*proto.Stats_Metric_Label{
92+
{Name: "rarity", Value: "blue moon"},
93+
{Name: "certainty", Value: "inshallah"},
94+
},
95+
},
96+
is: false,
97+
},
98+
{
99+
name: "wrongMetricName",
100+
req: updateRequest{
101+
username: "spike",
102+
workspaceName: "work",
103+
agentName: "janus",
104+
templateName: "tempe",
105+
metrics: nil,
106+
timestamp: time.Now().Add(-5 * time.Second),
107+
},
108+
m: &proto.Stats_Metric{
109+
Name: "cub",
110+
Type: proto.Stats_Metric_COUNTER,
111+
Value: 2,
112+
Labels: []*proto.Stats_Metric_Label{
113+
{Name: "rarity", Value: "blue moon"},
114+
{Name: "certainty", Value: "yes"},
115+
},
116+
},
117+
is: false,
118+
},
119+
{
120+
name: "wrongUsername",
121+
req: updateRequest{
122+
username: "steve",
123+
workspaceName: "work",
124+
agentName: "janus",
125+
templateName: "tempe",
126+
metrics: nil,
127+
timestamp: time.Now().Add(-5 * time.Second),
128+
},
129+
m: &proto.Stats_Metric{
130+
Name: "met",
131+
Type: proto.Stats_Metric_COUNTER,
132+
Value: 2,
133+
Labels: []*proto.Stats_Metric_Label{
134+
{Name: "rarity", Value: "blue moon"},
135+
{Name: "certainty", Value: "yes"},
136+
},
137+
},
138+
is: false,
139+
},
140+
{
141+
name: "wrongWorkspaceName",
142+
req: updateRequest{
143+
username: "spike",
144+
workspaceName: "play",
145+
agentName: "janus",
146+
templateName: "tempe",
147+
metrics: nil,
148+
timestamp: time.Now().Add(-5 * time.Second),
149+
},
150+
m: &proto.Stats_Metric{
151+
Name: "met",
152+
Type: proto.Stats_Metric_COUNTER,
153+
Value: 2,
154+
Labels: []*proto.Stats_Metric_Label{
155+
{Name: "rarity", Value: "blue moon"},
156+
{Name: "certainty", Value: "yes"},
157+
},
158+
},
159+
is: false,
160+
},
161+
{
162+
name: "wrongAgentName",
163+
req: updateRequest{
164+
username: "spike",
165+
workspaceName: "work",
166+
agentName: "bond",
167+
templateName: "tempe",
168+
metrics: nil,
169+
timestamp: time.Now().Add(-5 * time.Second),
170+
},
171+
m: &proto.Stats_Metric{
172+
Name: "met",
173+
Type: proto.Stats_Metric_COUNTER,
174+
Value: 2,
175+
Labels: []*proto.Stats_Metric_Label{
176+
{Name: "rarity", Value: "blue moon"},
177+
{Name: "certainty", Value: "yes"},
178+
},
179+
},
180+
is: false,
181+
},
182+
{
183+
name: "wrongTemplateName",
184+
req: updateRequest{
185+
username: "spike",
186+
workspaceName: "work",
187+
agentName: "janus",
188+
templateName: "phoenix",
189+
metrics: nil,
190+
timestamp: time.Now().Add(-5 * time.Second),
191+
},
192+
m: &proto.Stats_Metric{
193+
Name: "met",
194+
Type: proto.Stats_Metric_COUNTER,
195+
Value: 2,
196+
Labels: []*proto.Stats_Metric_Label{
197+
{Name: "rarity", Value: "blue moon"},
198+
{Name: "certainty", Value: "yes"},
199+
},
200+
},
201+
is: false,
202+
},
203+
} {
204+
tc := tc
205+
t.Run(tc.name, func(t *testing.T) {
206+
t.Parallel()
207+
require.Equal(t, tc.is, am1.is(tc.req, tc.m))
208+
})
209+
}
210+
}

coderd/prometheusmetrics/aggregator_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,19 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
5151
given1 := []*agentproto.Stats_Metric{
5252
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
5353
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 2},
54+
// Tests that we update labels correctly when they have extra labels
55+
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 27, Labels: []*agentproto.Stats_Metric_Label{
56+
{Name: "lizz", Value: "rizz"},
57+
}},
5458
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 3},
5559
}
5660

5761
given2 := []*agentproto.Stats_Metric{
5862
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4},
63+
// Tests that we update labels correctly when they have extra labels
64+
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
65+
{Name: "lizz", Value: "rizz"},
66+
}},
5967
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5},
6068
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
6169
{Name: "foobar", Value: "Foobaz"},
@@ -73,6 +81,13 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
7381
expected := []*agentproto.Stats_Metric{
7482
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels},
7583
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels},
84+
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
85+
{Name: "agent_name", Value: testAgentName},
86+
{Name: "lizz", Value: "rizz"},
87+
{Name: "username", Value: testUsername},
88+
{Name: "workspace_name", Value: testWorkspaceName},
89+
{Name: "template_name", Value: testTemplateName},
90+
}},
7691
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels},
7792
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
7893
{Name: "agent_name", Value: testAgentName},
@@ -111,6 +126,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
111126

112127
func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, actual []prometheus.Metric) bool {
113128
if len(expected) != len(actual) {
129+
t.Logf("expected %d metrics, got %d", len(expected), len(actual))
114130
return false
115131
}
116132

0 commit comments

Comments
 (0)