Skip to content

Commit d2e35c7

Browse files
shibataka000Joseph Burnett
authored andcommitted
Fix bug about unintentional scale out during updating deployment.
During rolling update with maxSurge=1 and maxUnavailable=0, len(metrics) is greater than currentReplcas and it may cause unintentional scale out.
1 parent d5dfb5c commit d2e35c7

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

pkg/controller/podautoscaler/replica_calculator.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,15 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
134134
return currentReplicas, utilization, rawUtilization, timestamp, nil
135135
}
136136

137+
newReplicas := int32(math.Ceil(newUsageRatio * float64(len(metrics))))
138+
if (newUsageRatio < 1.0 && newReplicas > currentReplicas) || (newUsageRatio > 1.0 && newReplicas < currentReplicas) {
139+
// return the current replicas if the change of metrics length would cause a change in scale direction
140+
return currentReplicas, utilization, rawUtilization, timestamp, nil
141+
}
142+
137143
// return the result, where the number of replicas considered is
138144
// however many replicas factored into our calculation
139-
return int32(math.Ceil(newUsageRatio * float64(len(metrics)))), utilization, rawUtilization, timestamp, nil
145+
return newReplicas, utilization, rawUtilization, timestamp, nil
140146
}
141147

142148
// GetRawResourceReplicas calculates the desired replica count based on a target resource utilization (as a raw milli-value)

pkg/controller/podautoscaler/replica_calculator_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,24 @@ func TestReplicaCalcMissingMetricsUnreadyScaleDown(t *testing.T) {
12471247
tc.runTest(t)
12481248
}
12491249

1250+
func TestReplicaCalcDuringRollingUpdateWithMaxSurge(t *testing.T) {
1251+
tc := replicaCalcTestCase{
1252+
currentReplicas: 2,
1253+
expectedReplicas: 2,
1254+
podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning},
1255+
resource: &resourceInfo{
1256+
name: v1.ResourceCPU,
1257+
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
1258+
levels: []int64{100, 100},
1259+
1260+
targetUtilization: 50,
1261+
expectedUtilization: 10,
1262+
expectedValue: numContainersPerPod * 100,
1263+
},
1264+
}
1265+
tc.runTest(t)
1266+
}
1267+
12501268
// TestComputedToleranceAlgImplementation is a regression test which
12511269
// back-calculates a minimal percentage for downscaling based on a small percentage
12521270
// increase in pod utilization which is calibrated against the tolerance value.

0 commit comments

Comments
 (0)