Skip to content

kubelet: invoke part of image verification policy earlier #133114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 57 additions & 35 deletions pkg/kubelet/images/image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,41 +178,15 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
return "", message, err
}

if imageRef != "" && !utilfeature.DefaultFeatureGate.Enabled(features.KubeletEnsureSecretPulledImages) {
msg := fmt.Sprintf("Container image %q already present on machine", requestedImage)
m.logIt(objRef, v1.EventTypeNormal, events.PulledImage, logPrefix, msg, klog.Info)
return imageRef, msg, nil
}

repoToPull, _, _, err := parsers.ParseImageName(spec.Image)
if err != nil {
return "", err.Error(), err
}
// wrap the lookup in a function to ensure that we only look up credentials once and no earlier than needed
lookupPullCredentials := m.makeLookupPullCredentialsFunc(spec.Image, pod, pullSecrets, podSandboxConfig)

// construct the dynamic keyring using the providers we have in the kubelet
var podName, podNamespace, podUID string
if utilfeature.DefaultFeatureGate.Enabled(features.KubeletServiceAccountTokenForCredentialProviders) {
sandboxMetadata := podSandboxConfig.GetMetadata()

podName = sandboxMetadata.Name
podNamespace = sandboxMetadata.Namespace
podUID = sandboxMetadata.Uid
}

externalCredentialProviderKeyring := credentialproviderplugin.NewExternalCredentialProviderDockerKeyring(
podNamespace,
podName,
podUID,
pod.Spec.ServiceAccountName)

keyring, err := credentialprovidersecrets.MakeDockerKeyring(pullSecrets, credentialprovider.UnionDockerKeyring{m.nodeKeyring, externalCredentialProviderKeyring})
if err != nil {
return "", err.Error(), err
}

pullCredentials, _ := keyring.Lookup(repoToPull)
getPodCredentials := func() ([]kubeletconfiginternal.ImagePullSecret, *kubeletconfiginternal.ImagePullServiceAccount, error) {
pullCredentials, err := lookupPullCredentials()
if err != nil {
return nil, nil, err
}

if imageRef != "" {
var imagePullSecrets []kubeletconfiginternal.ImagePullSecret
// we don't take the audience of the service account into account, so there can only
// be one imagePullServiceAccount per pod when we try to make a decision.
Expand All @@ -239,8 +213,18 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
}
}

pullRequired := m.imagePullManager.MustAttemptImagePull(requestedImage, imageRef, imagePullSecrets, imagePullServiceAccount)
if !pullRequired {
return imagePullSecrets, imagePullServiceAccount, nil
}

if imageRef != "" {
if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletEnsureSecretPulledImages) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keeping this code block where it was before will simlpify the review. Took me some time to understand what happened here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what the code originally looked like before #133079. I'd rather it stayed here.

msg := fmt.Sprintf("Container image %q already present on machine", requestedImage)
m.logIt(objRef, v1.EventTypeNormal, events.PulledImage, logPrefix, msg, klog.Info)
return imageRef, msg, nil
}
if pullRequired, err := m.imagePullManager.MustAttemptImagePull(requestedImage, imageRef, getPodCredentials); err != nil {
return "", err.Error(), err
} else if !pullRequired {
msg := fmt.Sprintf("Container image %q already present on machine and can be accessed by the pod", requestedImage)
m.logIt(objRef, v1.EventTypeNormal, events.PulledImage, logPrefix, msg, klog.Info)
return imageRef, msg, nil
Expand All @@ -254,9 +238,47 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
return "", msg, err
}

pullCredentials, err := lookupPullCredentials()
if err != nil {
return "", err.Error(), err
}

return m.pullImage(ctx, logPrefix, objRef, pod.UID, requestedImage, spec, pullCredentials, podSandboxConfig)
}

func (m *imageManager) makeLookupPullCredentialsFunc(image string, pod *v1.Pod, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig) func() ([]credentialprovider.TrackedAuthConfig, error) {
return sync.OnceValues(func() ([]credentialprovider.TrackedAuthConfig, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need to wrap it in OnceValues? It is only called once anyways as far as I can tell

Copy link
Member

@stlaz stlaz Jul 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the returned lookup function might be called twice in some cases:

  1. JIT during the MustAttemptImagePull()
  2. just before calling m.pullImage()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my question was mostly because I do not know exact behavior of sync.OnceValues. Will it retry if the first attempt resulted in err? Or we want to cache the error. So I thought it is easier just not have it =).

I will need to review the sequence you suggested. I plan to take it again later during 1.35, please ping if you think it is critical to do earleir

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will it retry if the first attempt resulted in err?

No. It caches both values.

Or we want to cache the error

An error in that place shows that something is seriously misconfigured, so it is unlikely that retrying makes sense. A mere failure to lookup credentials from the keyring does not return an error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A mere failure to lookup credentials from the keyring does not return an error.

Not because of this PR, I mean. The relevant method's signature does not include error.

// pkg/credentialprovider/keyring.go
type DockerKeyring interface {
	Lookup(image string) ([]TrackedAuthConfig, bool)
}

I plan to take it again later during 1.35, please ping if you think it is critical to do earleir

I think this change should be in place when KubeletEnsureSecretPulledImages feature gate goes GA and default to enabled, because at that point it will become enabled by some managed (cloud) Kubernetes providers with no option to disable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An error in that place shows that something is seriously misconfigured, so it is unlikely that retrying makes sense. A mere failure to lookup credentials from the keyring does not return an error.

Besides, I would like to make sure the result of the credential lookup matches what the authz check received in the context of a single EnsureImageExists() call.

I will need to review the sequence you suggested. I plan to take it again later during 1.35, please ping if you think it is critical to do earlier

This likely prevents us from moving the feature to Beta. It's not critical right now, but let's please make sure we pay attention to this PR early in 1.35.

repoToPull, _, _, err := parsers.ParseImageName(image)
if err != nil {
return nil, err
}

// construct the dynamic keyring using the providers we have in the kubelet
var podName, podNamespace, podUID string
if utilfeature.DefaultFeatureGate.Enabled(features.KubeletServiceAccountTokenForCredentialProviders) {
sandboxMetadata := podSandboxConfig.GetMetadata()

podName = sandboxMetadata.Name
podNamespace = sandboxMetadata.Namespace
podUID = sandboxMetadata.Uid
}

externalCredentialProviderKeyring := credentialproviderplugin.NewExternalCredentialProviderDockerKeyring(
podNamespace,
podName,
podUID,
pod.Spec.ServiceAccountName)

keyring, err := credentialprovidersecrets.MakeDockerKeyring(pullSecrets, credentialprovider.UnionDockerKeyring{m.nodeKeyring, externalCredentialProviderKeyring})
if err != nil {
return nil, err
}

pullCredentials, _ := keyring.Lookup(repoToPull)
return pullCredentials, nil
})
}

func (m *imageManager) pullImage(ctx context.Context, logPrefix string, objRef *v1.ObjectReference, podUID types.UID, image string, imgSpec kubecontainer.ImageSpec, pullCredentials []credentialprovider.TrackedAuthConfig, podSandboxConfig *runtimeapi.PodSandboxConfig) (imageRef, message string, err error) {
var pullSucceeded bool
var finalPullCredentials *credentialprovider.TrackedAuthConfig
Expand Down
27 changes: 19 additions & 8 deletions pkg/kubelet/images/image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,17 +694,22 @@ type mockImagePullManager struct {
config *mockImagePullManagerConfig
}

func (m *mockImagePullManager) MustAttemptImagePull(image, _ string, podSecrets []kubeletconfiginternal.ImagePullSecret, podServiceAccount *kubeletconfiginternal.ImagePullServiceAccount) bool {
func (m *mockImagePullManager) MustAttemptImagePull(image, _ string, getPodCredentials pullmanager.GetPodCredentials) (bool, error) {
if m.config == nil || m.config.allowAll {
return false
return false, nil
}

podSecrets, podServiceAccount, err := getPodCredentials()
if err != nil {
return true, err
}

// Check secrets
if allowedSecrets, ok := m.config.allowedSecrets[image]; ok {
for _, s := range podSecrets {
for _, allowed := range allowedSecrets {
if s.Namespace == allowed.Namespace && s.Name == allowed.Name && s.UID == allowed.UID {
return false
return false, nil
}
}
}
Expand All @@ -714,12 +719,12 @@ func (m *mockImagePullManager) MustAttemptImagePull(image, _ string, podSecrets
if podServiceAccount != nil {
if allowedServiceAccounts, ok := m.config.allowedServiceAccounts[image]; ok {
if slices.Contains(allowedServiceAccounts, *podServiceAccount) {
return false
return false, nil
}
}
}

return true
return true, nil
}

// mockImagePullManagerWithTracking tracks calls to MustAttemptImagePull for service account testing
Expand All @@ -736,19 +741,25 @@ type mockImagePullManagerWithTracking struct {
recordedCredentials *kubeletconfiginternal.ImagePullCredentials
}

func (m *mockImagePullManagerWithTracking) MustAttemptImagePull(image, imageRef string, podSecrets []kubeletconfiginternal.ImagePullSecret, podServiceAccount *kubeletconfiginternal.ImagePullServiceAccount) bool {
func (m *mockImagePullManagerWithTracking) MustAttemptImagePull(image, imageRef string, getPodCredentials pullmanager.GetPodCredentials) (bool, error) {
m.mustAttemptCalled = true
m.lastImage = image
m.lastImageRef = imageRef

podSecrets, podServiceAccount, err := getPodCredentials()
if err != nil {
return true, err
}

m.lastSecrets = podSecrets
if podServiceAccount != nil {
m.lastServiceAccounts = []kubeletconfiginternal.ImagePullServiceAccount{*podServiceAccount}
}

if m.allowAll {
return false
return false, nil
}
return m.mustAttemptReturn
return m.mustAttemptReturn, nil
}

func (m *mockImagePullManagerWithTracking) RecordImagePulled(image, imageRef string, credentials *kubeletconfiginternal.ImagePullCredentials) {
Expand Down
8 changes: 5 additions & 3 deletions pkg/kubelet/images/pullmanager/benchmarks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,11 @@ func directRecordReadFunc(expectHit bool) benchmarkedCheckFunc {

func mustAttemptPullReadFunc(expectHit bool) benchmarkedCheckFunc {
return func(b *testing.B, pullManager PullManager, imgRef string) {
mustPull := pullManager.MustAttemptImagePull("test.repo/org/"+imgRef, imgRef, nil, nil)
if mustPull != !expectHit {
b.Fatalf("MustAttemptImagePull() expected %t, got %t", !expectHit, mustPull)
mustPull, err := pullManager.MustAttemptImagePull("test.repo/org/"+imgRef, imgRef, func() ([]kubeletconfig.ImagePullSecret, *kubeletconfig.ImagePullServiceAccount, error) {
return nil, nil, nil
})
if mustPull != !expectHit || err != nil {
b.Fatalf("no error expected (got %v); MustAttemptImagePull() expected %t, got %t", err, !expectHit, mustPull)
}
}
}
Expand Down
31 changes: 18 additions & 13 deletions pkg/kubelet/images/pullmanager/image_pull_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,9 @@ func (f *PullManager) decrementImagePullIntent(image string) {
f.decrementIntentCounterForImage(image)
}

func (f *PullManager) MustAttemptImagePull(image, imageRef string, podSecrets []kubeletconfiginternal.ImagePullSecret, podServiceAccount *kubeletconfiginternal.ImagePullServiceAccount) bool {
func (f *PullManager) MustAttemptImagePull(image, imageRef string, getPodCredentials GetPodCredentials) (bool, error) {
if len(imageRef) == 0 {
return true
return true, nil
}

var imagePulledByKubelet bool
Expand Down Expand Up @@ -225,36 +225,41 @@ func (f *PullManager) MustAttemptImagePull(image, imageRef string, podSecrets []

if err != nil {
klog.ErrorS(err, "Unable to access cache records about image pulls")
return true
return true, nil
}

if !f.imagePolicyEnforcer.RequireCredentialVerificationForImage(image, imagePulledByKubelet) {
return false
return false, nil
}

if pulledRecord == nil {
// we have no proper records of the image being pulled in the past, we can short-circuit here
return true
return true, nil
}

sanitizedImage, err := trimImageTagDigest(image)
if err != nil {
klog.ErrorS(err, "failed to parse image name, forcing image credentials reverification", "image", sanitizedImage)
return true
return true, nil
}

cachedCreds, ok := pulledRecord.CredentialMapping[sanitizedImage]
if !ok {
return true
return true, nil
}

if cachedCreds.NodePodsAccessible {
// anyone on this node can access the image
return false
return false, nil
}

if len(cachedCreds.KubernetesSecrets) == 0 && len(cachedCreds.KubernetesServiceAccounts) == 0 {
return true
return true, nil
}

podSecrets, podServiceAccount, err := getPodCredentials()
if err != nil {
return true, err
}

for _, podSecret := range podSecrets {
Expand All @@ -275,7 +280,7 @@ func (f *PullManager) MustAttemptImagePull(image, imageRef string, podSecrets []
klog.ErrorS(err, "failed to write an image pulled record", "image", image, "imageRef", imageRef)
}
}
return false
return false, nil
}

if secretCoordinatesMatch {
Expand All @@ -286,18 +291,18 @@ func (f *PullManager) MustAttemptImagePull(image, imageRef string, podSecrets []
if err := f.writePulledRecordIfChanged(image, imageRef, &kubeletconfiginternal.ImagePullCredentials{KubernetesSecrets: []kubeletconfiginternal.ImagePullSecret{podSecret}}); err != nil {
klog.ErrorS(err, "failed to write an image pulled record", "image", image, "imageRef", imageRef)
}
return false
return false, nil
}
}
}
}

if podServiceAccount != nil && slices.Contains(cachedCreds.KubernetesServiceAccounts, *podServiceAccount) {
// we found a matching service account, no need to pull the image
return false
return false, nil
}

return true
return true, nil
}

func (f *PullManager) PruneUnknownRecords(imageList []string, until time.Time) {
Expand Down
Loading