Skip to content

Add kubelet_pod_emptydir_volume_{used,size_limit}_bytes Prometheus metrics #121489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,7 @@ func (kl *Kubelet) initializeModules() error {
metrics.Register(
collectors.NewVolumeStatsCollector(kl),
collectors.NewLogMetricsCollector(kl.StatsProvider.ListPodStats),
collectors.NewEmptyDirMetricsCollector(kl),
)
metrics.SetNodeName(kl.nodeName)
servermetrics.Register()
Expand Down
143 changes: 143 additions & 0 deletions pkg/kubelet/metrics/collectors/emptydir_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
Copyright 2024 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package collectors

import (
"context"

v1 "k8s.io/api/core/v1"
"k8s.io/component-base/metrics"
"k8s.io/klog/v2"

kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
serverstats "k8s.io/kubernetes/pkg/kubelet/server/stats"
)

var (
emptyDirUsedBytesDesc = metrics.NewDesc(
metrics.BuildFQName(
"",
kubeletmetrics.KubeletSubsystem,
kubeletmetrics.EmptyDirUsedBytesKey,
),
"Bytes used by the emptyDir volume. Only volumes on the default medium are considered.",
[]string{
"volume_name",
"namespace",
"pod",
},
Comment on lines +38 to +42
Copy link
Member

@dgrisonnet dgrisonnet Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cardinility should be approximately O(ephemeralStorage). Since we have two metrics, it should be two times that amount.

There was some discussions in the past in #69507 that suggested that in practice this should be reasonnable, but still we should be careful since this is unbounded and the churn could be big considering the ephemeral nature of these volumes.

@jsafrane do you perhaps know if there are real world scenarios where hundred thousands of ephemeral storage are created and these new metrics could blow up monitoring platforms?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Note that https://github.com/kubernetes/kubernetes/blob/bbd83d86444d7b325a51f4daa0d65163b795b70e/pkg/kubelet/metrics/collectors/volume_stats.go for PVC related metrics could be considered as a precedent, particularly in the context of local persistent storage.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If really only EmptyDir with StorageMediumDefault are reported, then I would expect that nr. of such volumes is smaller than nr. of Pods. And we reported bunch of per-pod metrics last time I checked.

nil,
metrics.ALPHA,
"",
)
emptyDirSizeLimitBytesDesc = metrics.NewDesc(
metrics.BuildFQName(
"",
kubeletmetrics.KubeletSubsystem,
kubeletmetrics.EmptyDirSizeLimitBytesKey,
),
"Size limit of the emptyDir volume in bytes, if set. Only volumes on the default medium are considered.",
[]string{
"volume_name",
"namespace",
"pod",
},
nil,
metrics.ALPHA,
"",
)
)

type emptyDirMetricsCollector struct {
metrics.BaseStableCollector

statsProvider serverstats.Provider
}

// Check if emptyDirMetricsCollector implements necessary interface
var _ metrics.StableCollector = &emptyDirMetricsCollector{}

// NewEmptyDirMetricsCollector implements the metrics.StableCollector interface and
// exposes metrics about pod's emptyDir.
func NewEmptyDirMetricsCollector(statsProvider serverstats.Provider) metrics.StableCollector {
return &emptyDirMetricsCollector{statsProvider: statsProvider}
}

// DescribeWithStability implements the metrics.StableCollector interface.
func (c *emptyDirMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
ch <- emptyDirUsedBytesDesc
ch <- emptyDirSizeLimitBytesDesc
}

// CollectWithStability implements the metrics.StableCollector interface.
func (c *emptyDirMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) {
podStats, err := c.statsProvider.ListPodStats(context.Background())
if err != nil {
klog.ErrorS(err, "Failed to get pod stats")
return
}

for _, podStat := range podStats {
podName := podStat.PodRef.Name
podNamespace := podStat.PodRef.Namespace

if podStat.VolumeStats == nil {
klog.V(5).InfoS("Pod has no volume stats", "pod", podName, "namespace", podNamespace)
continue
}

pod, found := c.statsProvider.GetPodByName(podNamespace, podName)
if !found {
klog.V(5).InfoS("Couldn't get pod", "pod", podName, "namespace", podNamespace)
continue
}

podVolumes := make(map[string]v1.Volume, len(pod.Spec.Volumes))
for _, volume := range pod.Spec.Volumes {
podVolumes[volume.Name] = volume
}

for _, volumeStat := range podStat.VolumeStats {
if volume, found := podVolumes[volumeStat.Name]; found {
// Only consider volumes on the default medium.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see there is already a VolumeStatsUsedBytesKey for Persistent volumes. Are emptyDir volumes the only volumes that are missing metrics today?

Copy link
Contributor Author

@machine424 machine424 Jan 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not the only ones, many other volumes don't have metrics.
I suggested in #121489 (comment) to deprecate the the existing per source metrics and have a generic metrics (with the sources as metrics), that will be easier to discover and to extend, but that effort will need more effort from maintainers.

if volume.EmptyDir != nil && volume.EmptyDir.Medium == v1.StorageMediumDefault {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be useful for medium to be a label? It seems like memory-backed empty-dir usage and limits would also be useful, since we already have computed that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if volumeStat.UsedBytes != nil {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we expose sth if nil?
NaN or sth. Or log sth?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not too familiar with what that would mean in practice, but wouldn't "0" be suited here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it could be nil temporarily (e.g., while the Pod is being created). I expect it will eventually not be nil. We can probably leave it as is, similar to how it's handled in log_metrics.go

if c.Logs != nil && c.Logs.UsedBytes != nil {
(we can always revisit them later)

ch <- metrics.NewLazyConstMetric(
emptyDirUsedBytesDesc,
metrics.GaugeValue,
float64(*volumeStat.UsedBytes),
volumeStat.Name,
podNamespace,
podName,
)
}
if volume.EmptyDir.SizeLimit != nil {
ch <- metrics.NewLazyConstMetric(
emptyDirSizeLimitBytesDesc,
metrics.GaugeValue,
volume.EmptyDir.SizeLimit.AsApproximateFloat64(),
volumeStat.Name,
podNamespace,
podName,
)
}
}
}

}
}
}
158 changes: 158 additions & 0 deletions pkg/kubelet/metrics/collectors/emptydir_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
Copyright 2024 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package collectors

import (
"context"
"strings"
"testing"

"k8s.io/apimachinery/pkg/api/resource"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/component-base/metrics/testutil"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
statstest "k8s.io/kubernetes/pkg/kubelet/server/stats/testing"
)

func TestEmptyDirCollector(t *testing.T) {

testNamespace := "test-namespace"
existingPodNameWithStats := "foo"
podNameWithoutStats := "bar"

podStats := []statsapi.PodStats{
{
PodRef: statsapi.PodReference{
Name: existingPodNameWithStats,
Namespace: testNamespace,
UID: "UID_foo",
},
StartTime: metav1.Now(),
VolumeStats: []statsapi.VolumeStats{
{
Name: "foo-emptydir-1",
FsStats: statsapi.FsStats{
UsedBytes: newUint64Pointer(2101248),
},
},
{
Name: "foo-emptydir-2",
FsStats: statsapi.FsStats{
UsedBytes: newUint64Pointer(6488064),
},
},
{
Name: "foo-memory-emptydir",
FsStats: statsapi.FsStats{
UsedBytes: newUint64Pointer(25362432),
},
},
{
Name: "foo-configmap",
FsStats: statsapi.FsStats{
UsedBytes: newUint64Pointer(4096),
},
},
},
},
}

existingPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: existingPodNameWithStats,
Namespace: testNamespace,
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
Name: "foo-emptydir-1",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: resource.NewQuantity(3000100, resource.BinarySI)},
},
},
{
Name: "foo-emptydir-2",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
},
},
{
Name: "foo-memory-emptydir",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{Medium: v1.StorageMediumMemory},
},
},
{
Name: "foo-configmap",
VolumeSource: v1.VolumeSource{
ConfigMap: &v1.ConfigMapVolumeSource{},
},
},
},
},
}

podWithoutStats := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podNameWithoutStats,
Namespace: testNamespace,
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
Name: "bar-emptydir",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
},
},
},
},
}

mockStatsProvider := statstest.NewMockProvider(t)

mockStatsProvider.EXPECT().ListPodStats(context.Background()).Return(podStats, nil).Maybe()
mockStatsProvider.EXPECT().
GetPodByName(testNamespace, existingPodNameWithStats).
Return(existingPod, true).
Maybe()
mockStatsProvider.EXPECT().
GetPodByName(testNamespace, podNameWithoutStats).
Return(podWithoutStats, true).
Maybe()

err := testutil.CustomCollectAndCompare(
&emptyDirMetricsCollector{statsProvider: mockStatsProvider},
strings.NewReader(`
# HELP kubelet_pod_emptydir_volume_size_limit_bytes [ALPHA] Size limit of the emptyDir volume in bytes, if set. Only volumes on the default medium are considered.
# TYPE kubelet_pod_emptydir_volume_size_limit_bytes gauge
kubelet_pod_emptydir_volume_size_limit_bytes{namespace="test-namespace",pod="foo",volume_name="foo-emptydir-1"} 3.0001e+06
# HELP kubelet_pod_emptydir_volume_used_bytes [ALPHA] Bytes used by the emptyDir volume. Only volumes on the default medium are considered.
# TYPE kubelet_pod_emptydir_volume_used_bytes gauge
kubelet_pod_emptydir_volume_used_bytes{namespace="test-namespace",pod="foo",volume_name="foo-emptydir-1"} 2.101248e+06
kubelet_pod_emptydir_volume_used_bytes{namespace="test-namespace",pod="foo",volume_name="foo-emptydir-2"} 6.488064e+06
`),
"kubelet_pod_emptydir_volume_size_limit_bytes",
"kubelet_pod_emptydir_volume_used_bytes",
)
if err != nil {
t.Fatal(err)
}

}
2 changes: 2 additions & 0 deletions pkg/kubelet/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ const (
PreemptionsKey = "preemptions"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
EmptyDirUsedBytesKey = "pod_emptydir_volume_used_bytes"
EmptyDirSizeLimitBytesKey = "pod_emptydir_volume_size_limit_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
Expand Down