Skip to content

Commit 564e531

Browse files
author
Grant Griffiths
committed
Add Snapshot Controller e2e metric tests
Signed-off-by: Grant Griffiths <ggriffiths@purestorage.com>
1 parent 9739592 commit 564e531

File tree

13 files changed

+467
-62
lines changed

13 files changed

+467
-62
lines changed

cluster/addons/volumesnapshots/volume-snapshot-controller/volume-snapshot-controller-deployment.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ spec:
2222
serviceAccount: volume-snapshot-controller
2323
containers:
2424
- name: volume-snapshot-controller
25-
image: k8s.gcr.io/sig-storage/snapshot-controller:v3.0.2
25+
image: k8s.gcr.io/sig-storage/snapshot-controller:v4.0.0
2626
args:
2727
- "--v=5"
28+
- "--metrics-path=/metrics"
29+
- "--http-endpoint=:9102"

test/e2e/apimachinery/garbage_collector.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ func verifyRemainingObjects(f *framework.Framework, objects map[string]int) (boo
259259
func gatherMetrics(f *framework.Framework) {
260260
ginkgo.By("Gathering metrics")
261261
var summary framework.TestDataSummary
262-
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false)
262+
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false, false)
263263
if err != nil {
264264
framework.Logf("Failed to create MetricsGrabber. Skipping metrics gathering.")
265265
} else {

test/e2e/framework/framework.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ func (f *Framework) BeforeEach() {
296296

297297
gatherMetricsAfterTest := TestContext.GatherMetricsAfterTest == "true" || TestContext.GatherMetricsAfterTest == "master"
298298
if gatherMetricsAfterTest && TestContext.IncludeClusterAutoscalerMetrics {
299-
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics)
299+
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics, false)
300300
if err != nil {
301301
Logf("Failed to create MetricsGrabber (skipping ClusterAutoscaler metrics gathering before test): %v", err)
302302
} else {
@@ -449,7 +449,7 @@ func (f *Framework) AfterEach() {
449449
ginkgo.By("Gathering metrics")
450450
// Grab apiserver, scheduler, controller-manager metrics and (optionally) nodes' kubelet metrics.
451451
grabMetricsFromKubelets := TestContext.GatherMetricsAfterTest != "master" && !ProviderIs("kubemark")
452-
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics)
452+
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics, false)
453453
if err != nil {
454454
Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err)
455455
} else {

test/e2e/framework/metrics/kubelet_metrics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ func getKubeletMetricsFromNode(c clientset.Interface, nodeName string) (KubeletM
139139
if c == nil {
140140
return GrabKubeletMetricsWithoutProxy(nodeName, "/metrics")
141141
}
142-
grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false)
142+
grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false, false)
143143
if err != nil {
144144
return KubeletMetrics{}, err
145145
}

test/e2e/framework/metrics/metrics_grabber.go

Lines changed: 94 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,40 +38,48 @@ const (
3838
// kubeControllerManagerPort is the default port for the controller manager status server.
3939
kubeControllerManagerPort = 10257
4040
metricsProxyPod = "metrics-proxy"
41+
// snapshotControllerPort is the port for the snapshot controller
42+
snapshotControllerPort = 9102
4143
)
4244

4345
// Collection is metrics collection of components
4446
type Collection struct {
45-
APIServerMetrics APIServerMetrics
46-
ControllerManagerMetrics ControllerManagerMetrics
47-
KubeletMetrics map[string]KubeletMetrics
48-
SchedulerMetrics SchedulerMetrics
49-
ClusterAutoscalerMetrics ClusterAutoscalerMetrics
47+
APIServerMetrics APIServerMetrics
48+
ControllerManagerMetrics ControllerManagerMetrics
49+
SnapshotControllerMetrics SnapshotControllerMetrics
50+
KubeletMetrics map[string]KubeletMetrics
51+
SchedulerMetrics SchedulerMetrics
52+
ClusterAutoscalerMetrics ClusterAutoscalerMetrics
5053
}
5154

5255
// Grabber provides functions which grab metrics from components
5356
type Grabber struct {
54-
client clientset.Interface
55-
externalClient clientset.Interface
56-
grabFromAPIServer bool
57-
grabFromControllerManager bool
58-
grabFromKubelets bool
59-
grabFromScheduler bool
60-
grabFromClusterAutoscaler bool
61-
kubeScheduler string
62-
waitForSchedulerReadyOnce sync.Once
63-
kubeControllerManager string
64-
waitForControllerManagerReadyOnce sync.Once
57+
client clientset.Interface
58+
externalClient clientset.Interface
59+
grabFromAPIServer bool
60+
grabFromControllerManager bool
61+
grabFromKubelets bool
62+
grabFromScheduler bool
63+
grabFromClusterAutoscaler bool
64+
grabFromSnapshotController bool
65+
kubeScheduler string
66+
waitForSchedulerReadyOnce sync.Once
67+
kubeControllerManager string
68+
waitForControllerManagerReadyOnce sync.Once
69+
snapshotController string
70+
waitForSnapshotControllerReadyOnce sync.Once
6571
}
6672

6773
// NewMetricsGrabber returns new metrics which are initialized.
68-
func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool) (*Grabber, error) {
74+
func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool, snapshotController bool) (*Grabber, error) {
6975

7076
kubeScheduler := ""
7177
kubeControllerManager := ""
78+
snapshotControllerManager := ""
7279

7380
regKubeScheduler := regexp.MustCompile("kube-scheduler-.*")
7481
regKubeControllerManager := regexp.MustCompile("kube-controller-manager-.*")
82+
regSnapshotController := regexp.MustCompile("volume-snapshot-controller.*")
7583

7684
podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{})
7785
if err != nil {
@@ -87,7 +95,10 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b
8795
if regKubeControllerManager.MatchString(pod.Name) {
8896
kubeControllerManager = pod.Name
8997
}
90-
if kubeScheduler != "" && kubeControllerManager != "" {
98+
if regSnapshotController.MatchString(pod.Name) {
99+
snapshotControllerManager = pod.Name
100+
}
101+
if kubeScheduler != "" && kubeControllerManager != "" && snapshotControllerManager != "" {
91102
break
92103
}
93104
}
@@ -99,20 +110,26 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b
99110
controllers = false
100111
klog.Warningf("Can't find kube-controller-manager pod. Grabbing metrics from kube-controller-manager is disabled.")
101112
}
113+
if snapshotControllerManager == "" {
114+
snapshotController = false
115+
klog.Warningf("Can't find snapshot-controller pod. Grabbing metrics from snapshot-controller is disabled.")
116+
}
102117
if ec == nil {
103118
klog.Warningf("Did not receive an external client interface. Grabbing metrics from ClusterAutoscaler is disabled.")
104119
}
105120

106121
return &Grabber{
107-
client: c,
108-
externalClient: ec,
109-
grabFromAPIServer: apiServer,
110-
grabFromControllerManager: controllers,
111-
grabFromKubelets: kubelets,
112-
grabFromScheduler: scheduler,
113-
grabFromClusterAutoscaler: clusterAutoscaler,
114-
kubeScheduler: kubeScheduler,
115-
kubeControllerManager: kubeControllerManager,
122+
client: c,
123+
externalClient: ec,
124+
grabFromAPIServer: apiServer,
125+
grabFromControllerManager: controllers,
126+
grabFromKubelets: kubelets,
127+
grabFromScheduler: scheduler,
128+
grabFromClusterAutoscaler: clusterAutoscaler,
129+
grabFromSnapshotController: snapshotController,
130+
kubeScheduler: kubeScheduler,
131+
kubeControllerManager: kubeControllerManager,
132+
snapshotController: snapshotControllerManager,
116133
}, nil
117134
}
118135

@@ -220,6 +237,48 @@ func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error)
220237
return parseControllerManagerMetrics(output)
221238
}
222239

240+
// GrabFromSnapshotController returns metrics from controller manager
241+
func (g *Grabber) GrabFromSnapshotController(podName string, port int) (SnapshotControllerMetrics, error) {
242+
if g.snapshotController == "" {
243+
return SnapshotControllerMetrics{}, fmt.Errorf("SnapshotController pod is not registered. Skipping SnapshotController's metrics gathering")
244+
}
245+
246+
// Use overrides if provided via test config flags.
247+
// Otherwise, use the default snapshot controller pod name and port.
248+
if podName == "" {
249+
podName = g.snapshotController
250+
}
251+
if port == 0 {
252+
port = snapshotControllerPort
253+
}
254+
255+
var err error
256+
g.waitForSnapshotControllerReadyOnce.Do(func() {
257+
if readyErr := e2epod.WaitForPodsReady(g.client, metav1.NamespaceSystem, podName, 0); readyErr != nil {
258+
err = fmt.Errorf("error waiting for snapshot controller pod to be ready: %w", readyErr)
259+
return
260+
}
261+
262+
var lastMetricsFetchErr error
263+
if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) {
264+
_, lastMetricsFetchErr = g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port)
265+
return lastMetricsFetchErr == nil, nil
266+
}); metricsWaitErr != nil {
267+
err = fmt.Errorf("error waiting for snapshot controller pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr)
268+
return
269+
}
270+
})
271+
if err != nil {
272+
return SnapshotControllerMetrics{}, err
273+
}
274+
275+
output, err := g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port)
276+
if err != nil {
277+
return SnapshotControllerMetrics{}, err
278+
}
279+
return parseSnapshotControllerMetrics(output)
280+
}
281+
223282
// GrabFromAPIServer returns metrics from API server
224283
func (g *Grabber) GrabFromAPIServer() (APIServerMetrics, error) {
225284
output, err := g.getMetricsFromAPIServer()
@@ -257,6 +316,14 @@ func (g *Grabber) Grab() (Collection, error) {
257316
result.ControllerManagerMetrics = metrics
258317
}
259318
}
319+
if g.grabFromSnapshotController {
320+
metrics, err := g.GrabFromSnapshotController(g.snapshotController, snapshotControllerPort)
321+
if err != nil {
322+
errs = append(errs, err)
323+
} else {
324+
result.SnapshotControllerMetrics = metrics
325+
}
326+
}
260327
if g.grabFromClusterAutoscaler {
261328
metrics, err := g.GrabFromClusterAutoscaler()
262329
if err != nil {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
Copyright 2021 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package metrics
18+
19+
import "k8s.io/component-base/metrics/testutil"
20+
21+
// SnapshotControllerMetrics is metrics for controller manager
22+
type SnapshotControllerMetrics testutil.Metrics
23+
24+
// Equal returns true if all metrics are the same as the arguments.
25+
func (m *SnapshotControllerMetrics) Equal(o SnapshotControllerMetrics) bool {
26+
return (*testutil.Metrics)(m).Equal(testutil.Metrics(o))
27+
}
28+
29+
func newSnapshotControllerMetrics() SnapshotControllerMetrics {
30+
result := testutil.NewMetrics()
31+
return SnapshotControllerMetrics(result)
32+
}
33+
34+
func parseSnapshotControllerMetrics(data string) (SnapshotControllerMetrics, error) {
35+
result := newSnapshotControllerMetrics()
36+
if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil {
37+
return SnapshotControllerMetrics{}, err
38+
}
39+
return result, nil
40+
}

test/e2e/framework/test_context.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@ type TestContextType struct {
182182

183183
// DockerConfigFile is a file that contains credentials which can be used to pull images from certain private registries, needed for a test.
184184
DockerConfigFile string
185+
186+
// SnapshotControllerPodName is the name used for identifying the snapshot controller pod.
187+
SnapshotControllerPodName string
188+
189+
// SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint.
190+
SnapshotControllerHTTPPort int
185191
}
186192

187193
// NodeKillerConfig describes configuration of NodeKiller -- a utility to
@@ -315,6 +321,9 @@ func RegisterCommonFlags(flags *flag.FlagSet) {
315321
flags.StringVar(&TestContext.ProgressReportURL, "progress-report-url", "", "The URL to POST progress updates to as the suite runs to assist in aiding integrations. If empty, no messages sent.")
316322
flags.StringVar(&TestContext.SpecSummaryOutput, "spec-dump", "", "The file to dump all ginkgo.SpecSummary to after tests run. If empty, no objects are saved/printed.")
317323
flags.StringVar(&TestContext.DockerConfigFile, "docker-config-file", "", "A file that contains credentials which can be used to pull images from certain private registries, needed for a test.")
324+
325+
flags.StringVar(&TestContext.SnapshotControllerPodName, "snapshot-controller-pod-name", "", "The pod name to use for identifying the snapshot controller in the kube-system namespace.")
326+
flags.IntVar(&TestContext.SnapshotControllerHTTPPort, "snapshot-controller-http-port", 0, "The port to use for snapshot controller HTTP communication.")
318327
}
319328

320329
// RegisterClusterFlags registers flags specific to the cluster e2e test suite.

test/e2e/framework/timeouts.go

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,20 @@ import "time"
2020

2121
const (
2222
// Default timeouts to be used in TimeoutContext
23-
podStartTimeout = 5 * time.Minute
24-
podStartShortTimeout = 2 * time.Minute
25-
podStartSlowTimeout = 15 * time.Minute
26-
podDeleteTimeout = 5 * time.Minute
27-
claimProvisionTimeout = 5 * time.Minute
28-
claimProvisionShortTimeout = 1 * time.Minute
29-
claimBoundTimeout = 3 * time.Minute
30-
pvReclaimTimeout = 3 * time.Minute
31-
pvBoundTimeout = 3 * time.Minute
32-
pvDeleteTimeout = 3 * time.Minute
33-
pvDeleteSlowTimeout = 20 * time.Minute
34-
snapshotCreateTimeout = 5 * time.Minute
35-
snapshotDeleteTimeout = 5 * time.Minute
23+
podStartTimeout = 5 * time.Minute
24+
podStartShortTimeout = 2 * time.Minute
25+
podStartSlowTimeout = 15 * time.Minute
26+
podDeleteTimeout = 5 * time.Minute
27+
claimProvisionTimeout = 5 * time.Minute
28+
claimProvisionShortTimeout = 1 * time.Minute
29+
claimBoundTimeout = 3 * time.Minute
30+
pvReclaimTimeout = 3 * time.Minute
31+
pvBoundTimeout = 3 * time.Minute
32+
pvDeleteTimeout = 3 * time.Minute
33+
pvDeleteSlowTimeout = 20 * time.Minute
34+
snapshotCreateTimeout = 5 * time.Minute
35+
snapshotDeleteTimeout = 5 * time.Minute
36+
snapshotControllerMetricsTimeout = 5 * time.Minute
3637
)
3738

3839
// TimeoutContext contains timeout settings for several actions.
@@ -77,23 +78,27 @@ type TimeoutContext struct {
7778

7879
// SnapshotDelete is how long for snapshot to delete snapshotContent.
7980
SnapshotDelete time.Duration
81+
82+
// SnapshotControllerMetrics is how long to wait for snapshot controller metrics.
83+
SnapshotControllerMetrics time.Duration
8084
}
8185

8286
// NewTimeoutContextWithDefaults returns a TimeoutContext with default values.
8387
func NewTimeoutContextWithDefaults() *TimeoutContext {
8488
return &TimeoutContext{
85-
PodStart: podStartTimeout,
86-
PodStartShort: podStartShortTimeout,
87-
PodStartSlow: podStartSlowTimeout,
88-
PodDelete: podDeleteTimeout,
89-
ClaimProvision: claimProvisionTimeout,
90-
ClaimProvisionShort: claimProvisionShortTimeout,
91-
ClaimBound: claimBoundTimeout,
92-
PVReclaim: pvReclaimTimeout,
93-
PVBound: pvBoundTimeout,
94-
PVDelete: pvDeleteTimeout,
95-
PVDeleteSlow: pvDeleteSlowTimeout,
96-
SnapshotCreate: snapshotCreateTimeout,
97-
SnapshotDelete: snapshotDeleteTimeout,
89+
PodStart: podStartTimeout,
90+
PodStartShort: podStartShortTimeout,
91+
PodStartSlow: podStartSlowTimeout,
92+
PodDelete: podDeleteTimeout,
93+
ClaimProvision: claimProvisionTimeout,
94+
ClaimProvisionShort: claimProvisionShortTimeout,
95+
ClaimBound: claimBoundTimeout,
96+
PVReclaim: pvReclaimTimeout,
97+
PVBound: pvBoundTimeout,
98+
PVDelete: pvDeleteTimeout,
99+
PVDeleteSlow: pvDeleteSlowTimeout,
100+
SnapshotCreate: snapshotCreateTimeout,
101+
SnapshotDelete: snapshotDeleteTimeout,
102+
SnapshotControllerMetrics: snapshotControllerMetricsTimeout,
98103
}
99104
}

test/e2e/instrumentation/monitoring/metrics_grabber.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() {
5151
}
5252
}
5353
gomega.Eventually(func() error {
54-
grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true)
54+
grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true, true)
5555
if err != nil {
5656
return fmt.Errorf("failed to create metrics grabber: %v", err)
5757
}

0 commit comments

Comments
 (0)