Skip to content

Commit 3680aa4

Browse files
author
Your Name
committed
fix: Add total image size to image GC failure message
1 parent 8d14da6 commit 3680aa4

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

pkg/kubelet/images/image_gc_manager.go

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,19 @@ type imageRecord struct {
187187
pinned bool
188188
}
189189

190+
// IsInUse returns true if the image was observed in a container by
191+
// imageDetect() more recently than freeTime.
192+
func (rec *imageRecord) IsInUse(freeTime time.Time) bool {
193+
return rec.lastUsed.Equal(freeTime) || rec.lastUsed.After(freeTime)
194+
}
195+
196+
// IsTooNew returns true if the image was first detected more recently than the
197+
// minimum garbage collection age. In such a case, the image may have just
198+
// been pulled down, and will be used by a container right away.
199+
func (rec *imageRecord) IsTooNew(freeTime time.Time, minAge time.Duration) bool {
200+
return freeTime.Sub(rec.firstDetected) < minAge
201+
}
202+
190203
// NewImageGCManager instantiates a new ImageGCManager object.
191204
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, postGCHooks []PostImageGCHook, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, tracerProvider trace.TracerProvider) (ImageGCManager, error) {
192205
// Validate policy.
@@ -398,13 +411,24 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.T
398411
im.runPostGCHooks(remainingImages, freeTime)
399412

400413
if freed < amountToFree {
414+
// Calculate total size of all images that cannot be garbage collected.
415+
// This should match the logic in freeSpace() but look at all present images,
416+
// not just the unused images returned by imagesInEvictionOrder().
417+
unreclaimableImageSize := int64(0)
418+
im.imageRecordsLock.Lock()
419+
for _, rec := range im.imageRecords {
420+
if rec.IsInUse(freeTime) || rec.IsTooNew(freeTime, im.policy.MinAge) {
421+
unreclaimableImageSize += rec.size
422+
}
423+
}
424+
im.imageRecordsLock.Unlock()
401425
// This usually means the disk is full for reasons other than container
402426
// images, such as logs, volumes, or other files. However, it could also
403427
// be due to an unusually large number or size of in-use container images.
404428
message := fmt.Sprintf("Insufficient free disk space on the node's image filesystem (%.1f%% of %s used). "+
405-
"Failed to free sufficient space by deleting unused images. "+
429+
"Failed to free sufficient space by deleting unused images (%s used for active images). "+
406430
"Consider resizing the disk or deleting unused files.",
407-
usagePercent, formatSize(capacity))
431+
usagePercent, formatSize(capacity), formatSize(unreclaimableImageSize))
408432
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, "%s", message)
409433
return fmt.Errorf("%s", message)
410434
}
@@ -481,16 +505,13 @@ func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64,
481505
var imagesLeft []string
482506
for _, image := range images {
483507
klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on disk usage", "imageID", image.id, "runtimeHandler", image.imageRecord.runtimeHandlerUsedToPullImage)
484-
// Images that are currently in used were given a newer lastUsed.
485-
if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
508+
if image.imageRecord.IsInUse(freeTime) {
486509
klog.V(5).InfoS("Image ID was used too recently, not eligible for garbage collection", "imageID", image.id, "lastUsed", image.lastUsed, "freeTime", freeTime)
487510
imagesLeft = append(imagesLeft, image.id)
488511
continue
489512
}
490513

491-
// Avoid garbage collect the image if the image is not old enough.
492-
// In such a case, the image may have just been pulled down, and will be used by a container right away.
493-
if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
514+
if image.imageRecord.IsTooNew(freeTime, im.policy.MinAge) {
494515
klog.V(5).InfoS("Image ID's age is less than the policy's minAge, not eligible for garbage collection", "imageID", image.id, "age", freeTime.Sub(image.firstDetected), "minAge", im.policy.MinAge)
495516
imagesLeft = append(imagesLeft, image.id)
496517
continue

pkg/kubelet/images/image_gc_manager_test.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,16 +700,27 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
700700
// This image is unused and eligible for deletion.
701701
// Its size is less than the required amount to free.
702702
imageSize := int64(500 * 1024 * 1024) // 500 MiB
703+
// Add an in-use image and a container referencing it to ensure inUseSize is non-zero.
704+
inUseImageSize := int64(700 * 1024 * 1024) // 700 MiB
703705
fakeRuntime.ImageList = []container.Image{
704706
makeImage(0, imageSize),
707+
makeImage(1, inUseImageSize),
708+
}
709+
// Set up a pod/container using image-1 so it is considered in-use
710+
fakeRuntime.AllPodList = []*containertest.FakePod{
711+
{Pod: &container.Pod{
712+
Containers: []*container.Container{
713+
makeContainer(1),
714+
},
715+
}},
705716
}
706717

707718
err := manager.GarbageCollect(ctx, time.Now())
708719
assert.Error(t, err)
709720

710721
// Check that a warning event was sent
711722
expectedEvent := "Warning FreeDiskSpaceFailed Insufficient free disk space on the node's image filesystem" +
712-
" (95.0% of 10.0 GiB used). Failed to free sufficient space by deleting unused images." +
723+
" (95.0% of 10.0 GiB used). Failed to free sufficient space by deleting unused images (700.0 MiB used for active images)." +
713724
" Consider resizing the disk or deleting unused files."
714725
select {
715726
case event := <-recorder.Events:

0 commit comments

Comments
 (0)