Skip to content

Commit fb39754

Browse files
committed
Separate backOff policy for static pod
Most static pods run as critical components. When an exception occurs and a restart is required, the sooner the better, so a separate backoff policy is set for static pods.
1 parent 1740d85 commit fb39754

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

pkg/kubelet/kubelet.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
873873
volumepathhandler.NewBlockVolumePathHandler())
874874

875875
klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
876+
klet.staticBackOff = flowcontrol.NewBackOff(time.Second*3, time.Second*10)
876877

877878
// setup eviction manager
878879
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig,
@@ -1259,6 +1260,8 @@ type Kubelet struct {
12591260

12601261
// Container restart Backoff
12611262
backOff *flowcontrol.Backoff
1263+
// static pod Backoff
1264+
staticBackOff *flowcontrol.Backoff
12621265

12631266
// Information about the ports which are opened by daemons on Node running this Kubelet server.
12641267
daemonEndpoints *v1.NodeDaemonEndpoints
@@ -1940,7 +1943,14 @@ func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType
19401943
// Use WithoutCancel instead of a new context.TODO() to propagate trace context
19411944
// Call the container runtime's SyncPod callback
19421945
sctx := context.WithoutCancel(ctx)
1943-
result := kl.containerRuntime.SyncPod(sctx, pod, podStatus, pullSecrets, kl.backOff)
1946+
backOff := kl.backOff
1947+
if pod.Annotations != nil && pod.Annotations[kubetypes.ConfigSourceAnnotationKey] == "file" {
1948+
klog.V(5).InfoS("use static backOff for pod. ", "pod", klog.KObj(pod))
1949+
backOff = kl.staticBackOff
1950+
} else {
1951+
klog.V(5).InfoS("use normal backOff for pod. ", "pod", klog.KObj(pod))
1952+
}
1953+
result := kl.containerRuntime.SyncPod(sctx, pod, podStatus, pullSecrets, backOff)
19441954
kl.reasonCache.Update(pod.UID, result)
19451955
if err := result.Error(); err != nil {
19461956
// Do not return error if the only failures were pods in backoff

pkg/kubelet/kubelet_pods.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,6 +1370,7 @@ func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
13701370

13711371
// Cleanup any backoff entries.
13721372
kl.backOff.GC()
1373+
kl.staticBackOff.GC()
13731374
return nil
13741375
}
13751376

pkg/kubelet/kubelet_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,9 @@ func newTestKubeletWithImageList(
330330

331331
fakeClock := testingclock.NewFakeClock(time.Now())
332332
kubelet.backOff = flowcontrol.NewBackOff(time.Second, time.Minute)
333+
kubelet.staticBackOff = flowcontrol.NewBackOff(time.Second, time.Minute)
333334
kubelet.backOff.Clock = fakeClock
335+
kubelet.staticBackOff.Clock = fakeClock
334336
kubelet.resyncInterval = 10 * time.Second
335337
kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock)
336338
// Relist period does not affect the tests.

0 commit comments

Comments
 (0)