-
Notifications
You must be signed in to change notification settings - Fork 41.1k
Add kubelet_pod_emptydir_volume_{used,size_limit}_bytes Prometheus metrics #121489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,143 @@ | ||||
/* | ||||
Copyright 2024 The Kubernetes Authors. | ||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); | ||||
you may not use this file except in compliance with the License. | ||||
You may obtain a copy of the License at | ||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 | ||||
|
||||
Unless required by applicable law or agreed to in writing, software | ||||
distributed under the License is distributed on an "AS IS" BASIS, | ||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
See the License for the specific language governing permissions and | ||||
limitations under the License. | ||||
*/ | ||||
|
||||
package collectors | ||||
|
||||
import ( | ||||
"context" | ||||
|
||||
v1 "k8s.io/api/core/v1" | ||||
"k8s.io/component-base/metrics" | ||||
"k8s.io/klog/v2" | ||||
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" | ||||
serverstats "k8s.io/kubernetes/pkg/kubelet/server/stats" | ||||
) | ||||
|
||||
var ( | ||||
emptyDirUsedBytesDesc = metrics.NewDesc( | ||||
metrics.BuildFQName( | ||||
"", | ||||
kubeletmetrics.KubeletSubsystem, | ||||
kubeletmetrics.EmptyDirUsedBytesKey, | ||||
), | ||||
"Bytes used by the emptyDir volume. Only volumes on the default medium are considered.", | ||||
[]string{ | ||||
"volume_name", | ||||
"namespace", | ||||
"pod", | ||||
}, | ||||
nil, | ||||
metrics.ALPHA, | ||||
"", | ||||
) | ||||
emptyDirSizeLimitBytesDesc = metrics.NewDesc( | ||||
metrics.BuildFQName( | ||||
"", | ||||
kubeletmetrics.KubeletSubsystem, | ||||
kubeletmetrics.EmptyDirSizeLimitBytesKey, | ||||
), | ||||
"Size limit of the emptyDir volume in bytes, if set. Only volumes on the default medium are considered.", | ||||
[]string{ | ||||
"volume_name", | ||||
"namespace", | ||||
"pod", | ||||
}, | ||||
nil, | ||||
metrics.ALPHA, | ||||
"", | ||||
) | ||||
) | ||||
|
||||
type emptyDirMetricsCollector struct { | ||||
metrics.BaseStableCollector | ||||
|
||||
statsProvider serverstats.Provider | ||||
} | ||||
|
||||
// Check if emptyDirMetricsCollector implements necessary interface | ||||
var _ metrics.StableCollector = &emptyDirMetricsCollector{} | ||||
|
||||
// NewEmptyDirMetricsCollector implements the metrics.StableCollector interface and | ||||
// exposes metrics about pod's emptyDir. | ||||
func NewEmptyDirMetricsCollector(statsProvider serverstats.Provider) metrics.StableCollector { | ||||
machine424 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
return &emptyDirMetricsCollector{statsProvider: statsProvider} | ||||
} | ||||
|
||||
// DescribeWithStability implements the metrics.StableCollector interface. | ||||
func (c *emptyDirMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) { | ||||
ch <- emptyDirUsedBytesDesc | ||||
ch <- emptyDirSizeLimitBytesDesc | ||||
} | ||||
|
||||
// CollectWithStability implements the metrics.StableCollector interface. | ||||
func (c *emptyDirMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) { | ||||
podStats, err := c.statsProvider.ListPodStats(context.Background()) | ||||
if err != nil { | ||||
klog.ErrorS(err, "Failed to get pod stats") | ||||
return | ||||
} | ||||
|
||||
for _, podStat := range podStats { | ||||
podName := podStat.PodRef.Name | ||||
podNamespace := podStat.PodRef.Namespace | ||||
|
||||
if podStat.VolumeStats == nil { | ||||
klog.V(5).InfoS("Pod has no volume stats", "pod", podName, "namespace", podNamespace) | ||||
continue | ||||
} | ||||
|
||||
pod, found := c.statsProvider.GetPodByName(podNamespace, podName) | ||||
if !found { | ||||
klog.V(5).InfoS("Couldn't get pod", "pod", podName, "namespace", podNamespace) | ||||
continue | ||||
} | ||||
|
||||
podVolumes := make(map[string]v1.Volume, len(pod.Spec.Volumes)) | ||||
for _, volume := range pod.Spec.Volumes { | ||||
podVolumes[volume.Name] = volume | ||||
} | ||||
|
||||
for _, volumeStat := range podStat.VolumeStats { | ||||
if volume, found := podVolumes[volumeStat.Name]; found { | ||||
// Only consider volumes on the default medium. | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see there is already a VolumeStatsUsedBytesKey for Persistent volumes. Are emptyDir volumes the only volumes that are missing metrics today? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not the only ones, many other volumes don't have metrics. |
||||
if volume.EmptyDir != nil && volume.EmptyDir.Medium == v1.StorageMediumDefault { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be useful for medium to be a label? It seems like memory-backed empty-dir usage and limits would also be useful, since we already have computed that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
if volumeStat.UsedBytes != nil { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we expose sth if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not too familiar with what that would mean in practice, but wouldn't "0" be suited here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it could be
|
||||
ch <- metrics.NewLazyConstMetric( | ||||
emptyDirUsedBytesDesc, | ||||
metrics.GaugeValue, | ||||
float64(*volumeStat.UsedBytes), | ||||
volumeStat.Name, | ||||
podNamespace, | ||||
podName, | ||||
) | ||||
} | ||||
if volume.EmptyDir.SizeLimit != nil { | ||||
ch <- metrics.NewLazyConstMetric( | ||||
emptyDirSizeLimitBytesDesc, | ||||
metrics.GaugeValue, | ||||
volume.EmptyDir.SizeLimit.AsApproximateFloat64(), | ||||
volumeStat.Name, | ||||
podNamespace, | ||||
podName, | ||||
) | ||||
} | ||||
} | ||||
} | ||||
|
||||
} | ||||
} | ||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
/* | ||
Copyright 2024 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package collectors | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
"testing" | ||
|
||
"k8s.io/apimachinery/pkg/api/resource" | ||
|
||
v1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/component-base/metrics/testutil" | ||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" | ||
statstest "k8s.io/kubernetes/pkg/kubelet/server/stats/testing" | ||
) | ||
|
||
func TestEmptyDirCollector(t *testing.T) { | ||
|
||
testNamespace := "test-namespace" | ||
existingPodNameWithStats := "foo" | ||
podNameWithoutStats := "bar" | ||
|
||
podStats := []statsapi.PodStats{ | ||
{ | ||
PodRef: statsapi.PodReference{ | ||
Name: existingPodNameWithStats, | ||
Namespace: testNamespace, | ||
UID: "UID_foo", | ||
}, | ||
StartTime: metav1.Now(), | ||
VolumeStats: []statsapi.VolumeStats{ | ||
{ | ||
Name: "foo-emptydir-1", | ||
FsStats: statsapi.FsStats{ | ||
UsedBytes: newUint64Pointer(2101248), | ||
}, | ||
}, | ||
{ | ||
Name: "foo-emptydir-2", | ||
FsStats: statsapi.FsStats{ | ||
UsedBytes: newUint64Pointer(6488064), | ||
}, | ||
}, | ||
{ | ||
Name: "foo-memory-emptydir", | ||
FsStats: statsapi.FsStats{ | ||
UsedBytes: newUint64Pointer(25362432), | ||
}, | ||
}, | ||
{ | ||
Name: "foo-configmap", | ||
FsStats: statsapi.FsStats{ | ||
UsedBytes: newUint64Pointer(4096), | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
existingPod := &v1.Pod{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: existingPodNameWithStats, | ||
Namespace: testNamespace, | ||
}, | ||
Spec: v1.PodSpec{ | ||
Volumes: []v1.Volume{ | ||
{ | ||
Name: "foo-emptydir-1", | ||
VolumeSource: v1.VolumeSource{ | ||
EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: resource.NewQuantity(3000100, resource.BinarySI)}, | ||
}, | ||
}, | ||
{ | ||
Name: "foo-emptydir-2", | ||
VolumeSource: v1.VolumeSource{ | ||
EmptyDir: &v1.EmptyDirVolumeSource{}, | ||
}, | ||
}, | ||
{ | ||
Name: "foo-memory-emptydir", | ||
VolumeSource: v1.VolumeSource{ | ||
EmptyDir: &v1.EmptyDirVolumeSource{Medium: v1.StorageMediumMemory}, | ||
}, | ||
}, | ||
{ | ||
Name: "foo-configmap", | ||
VolumeSource: v1.VolumeSource{ | ||
ConfigMap: &v1.ConfigMapVolumeSource{}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
podWithoutStats := &v1.Pod{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: podNameWithoutStats, | ||
Namespace: testNamespace, | ||
}, | ||
Spec: v1.PodSpec{ | ||
Volumes: []v1.Volume{ | ||
{ | ||
Name: "bar-emptydir", | ||
VolumeSource: v1.VolumeSource{ | ||
EmptyDir: &v1.EmptyDirVolumeSource{}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
mockStatsProvider := statstest.NewMockProvider(t) | ||
|
||
mockStatsProvider.EXPECT().ListPodStats(context.Background()).Return(podStats, nil).Maybe() | ||
mockStatsProvider.EXPECT(). | ||
GetPodByName(testNamespace, existingPodNameWithStats). | ||
Return(existingPod, true). | ||
Maybe() | ||
mockStatsProvider.EXPECT(). | ||
GetPodByName(testNamespace, podNameWithoutStats). | ||
Return(podWithoutStats, true). | ||
Maybe() | ||
|
||
err := testutil.CustomCollectAndCompare( | ||
&emptyDirMetricsCollector{statsProvider: mockStatsProvider}, | ||
strings.NewReader(` | ||
# HELP kubelet_pod_emptydir_volume_size_limit_bytes [ALPHA] Size limit of the emptyDir volume in bytes, if set. Only volumes on the default medium are considered. | ||
# TYPE kubelet_pod_emptydir_volume_size_limit_bytes gauge | ||
kubelet_pod_emptydir_volume_size_limit_bytes{namespace="test-namespace",pod="foo",volume_name="foo-emptydir-1"} 3.0001e+06 | ||
# HELP kubelet_pod_emptydir_volume_used_bytes [ALPHA] Bytes used by the emptyDir volume. Only volumes on the default medium are considered. | ||
# TYPE kubelet_pod_emptydir_volume_used_bytes gauge | ||
kubelet_pod_emptydir_volume_used_bytes{namespace="test-namespace",pod="foo",volume_name="foo-emptydir-1"} 2.101248e+06 | ||
kubelet_pod_emptydir_volume_used_bytes{namespace="test-namespace",pod="foo",volume_name="foo-emptydir-2"} 6.488064e+06 | ||
`), | ||
"kubelet_pod_emptydir_volume_size_limit_bytes", | ||
"kubelet_pod_emptydir_volume_used_bytes", | ||
) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
} |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The cardinility should be approximately O(ephemeralStorage). Since we have two metrics, it should be two times that amount.
There was some discussions in the past in #69507 that suggested that in practice this should be reasonnable, but still we should be careful since this is unbounded and the churn could be big considering the ephemeral nature of these volumes.
@jsafrane do you perhaps know if there are real world scenarios where hundred thousands of ephemeral storage are created and these new metrics could blow up monitoring platforms?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Note that https://github.com/kubernetes/kubernetes/blob/bbd83d86444d7b325a51f4daa0d65163b795b70e/pkg/kubelet/metrics/collectors/volume_stats.go for PVC related metrics could be considered as a precedent, particularly in the context of local persistent storage.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If really only EmptyDir with
StorageMediumDefault
are reported, then I would expect that nr. of such volumes is smaller than nr. of Pods. And we reported bunch of per-pod metrics last time I checked.