Skip to content

Commit d961af4

Browse files
committed
fixed bug such that implicit extended resource name can always be used,
no matter the explicit extendedResourceName field in device class is set or not.
1 parent 7bbcfda commit d961af4

File tree

8 files changed

+135
-25
lines changed

8 files changed

+135
-25
lines changed

pkg/kubelet/cm/dra/manager.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ import (
3939

4040
drahealthv1alpha1 "k8s.io/kubelet/pkg/apis/dra-health/v1alpha1"
4141
drapb "k8s.io/kubelet/pkg/apis/dra/v1"
42-
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
4342
kubefeatures "k8s.io/kubernetes/pkg/features"
4443
draplugin "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
4544
"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
@@ -48,6 +47,7 @@ import (
4847
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
4948
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
5049
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
50+
"k8s.io/kubernetes/pkg/scheduler/util"
5151
)
5252

5353
// draManagerStateFileName is the file name where dra manager stores its state
@@ -526,7 +526,7 @@ func (m *Manager) GetResources(pod *v1.Pod, container *v1.Container) (*Container
526526
// We only care about the resources requested by the pod
527527
continue
528528
}
529-
if v1helper.IsExtendedResourceName(rName) {
529+
if util.IsDRAExtendedResourceName(rName) {
530530
requestName := ""
531531
for _, rm := range pod.Status.ExtendedResourceClaimStatus.RequestMappings {
532532
if rm.ContainerName == container.Name && rm.ResourceName == rName.String() {

pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ import (
4747
"k8s.io/dynamic-resource-allocation/structured"
4848
"k8s.io/klog/v2"
4949
fwk "k8s.io/kube-scheduler/framework"
50-
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
5150
"k8s.io/kubernetes/pkg/scheduler/apis/config"
5251
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
5352
"k8s.io/kubernetes/pkg/scheduler/framework"
@@ -430,7 +429,7 @@ func hasDeviceClassMappedExtendedResource(reqs v1.ResourceList, deviceClassMappi
430429
// We only care about the resources requested by the pod we are trying to schedule.
431430
continue
432431
}
433-
if v1helper.IsExtendedResourceName(rName) {
432+
if schedutil.IsDRAExtendedResourceName(rName) {
434433
_, ok := deviceClassMapping[rName]
435434
if ok {
436435
return true
@@ -761,7 +760,7 @@ func (pl *DynamicResources) filterExtendedResources(state *stateData, pod *v1.Po
761760
extendedResources := make(map[v1.ResourceName]int64)
762761
hasExtendedResource := false
763762
for rName, rQuant := range state.draExtendedResource.podScalarResources {
764-
if !v1helper.IsExtendedResourceName(rName) {
763+
if !schedutil.IsDRAExtendedResourceName(rName) {
765764
continue
766765
}
767766
// Skip in case request quantity is zero

pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,21 +58,22 @@ import (
5858
var (
5959
podKind = v1.SchemeGroupVersion.WithKind("Pod")
6060

61-
nodeName = "worker"
62-
node2Name = "worker-2"
63-
node3Name = "worker-3"
64-
driver = "some-driver"
65-
driver2 = "some-driver-2"
66-
podName = "my-pod"
67-
podUID = "1234"
68-
resourceName = "my-resource"
69-
resourceName2 = resourceName + "-2"
70-
claimName = podName + "-" + resourceName
71-
claimName2 = podName + "-" + resourceName2
72-
className = "my-resource-class"
73-
namespace = "default"
74-
attrName = resourceapi.QualifiedName("healthy") // device attribute only available on non-default node
75-
extendedResourceName = "example.com/gpu"
61+
nodeName = "worker"
62+
node2Name = "worker-2"
63+
node3Name = "worker-3"
64+
driver = "some-driver"
65+
driver2 = "some-driver-2"
66+
podName = "my-pod"
67+
podUID = "1234"
68+
resourceName = "my-resource"
69+
resourceName2 = resourceName + "-2"
70+
claimName = podName + "-" + resourceName
71+
claimName2 = podName + "-" + resourceName2
72+
className = "my-resource-class"
73+
namespace = "default"
74+
attrName = resourceapi.QualifiedName("healthy") // device attribute only available on non-default node
75+
extendedResourceName = "example.com/gpu"
76+
implicitExtendedResourceName = "deviceclass.resource.kubernetes.io/my-resource-class"
7677

7778
deviceClass = &resourceapi.DeviceClass{
7879
ObjectMeta: metav1.ObjectMeta{
@@ -122,6 +123,12 @@ var (
122123
v1.ResourceName(extendedResourceName): "1",
123124
}).
124125
Obj()
126+
podWithImplicitExtendedResourceName = st.MakePod().Name(podName).Namespace(namespace).
127+
UID(podUID).
128+
Req(map[v1.ResourceName]string{
129+
v1.ResourceName(implicitExtendedResourceName): "1",
130+
}).
131+
Obj()
125132

126133
// Node with "instance-1" device and no device attributes.
127134
workerNode = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Node
@@ -1303,6 +1310,24 @@ func TestPlugin(t *testing.T) {
13031310
},
13041311
},
13051312
},
1313+
"implicit-extended-resource-name-with-resources": {
1314+
enableDRAExtendedResource: true,
1315+
pod: podWithImplicitExtendedResourceName,
1316+
classes: []*resourceapi.DeviceClass{deviceClass},
1317+
objs: []apiruntime.Object{workerNodeSlice, podWithImplicitExtendedResourceName},
1318+
want: want{
1319+
reserve: result{
1320+
inFlightClaim: extendedResourceClaimNoName,
1321+
},
1322+
prebind: result{
1323+
assumedClaim: reserve(extendedResourceClaim, podWithImplicitExtendedResourceName),
1324+
added: []metav1.Object{reserve(extendedResourceClaim, podWithImplicitExtendedResourceName)},
1325+
},
1326+
postbind: result{
1327+
assumedClaim: reserve(extendedResourceClaim, podWithImplicitExtendedResourceName),
1328+
},
1329+
},
1330+
},
13061331
"extended-resource-name-with-resources-has-claim": {
13071332
enableDRAExtendedResource: true,
13081333
pod: podWithExtendedResourceName,

pkg/scheduler/framework/plugins/dynamicresources/extended/util.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ package extended
1818

1919
import (
2020
v1 "k8s.io/api/core/v1"
21-
"k8s.io/api/resource/v1beta1"
21+
resourceapi "k8s.io/api/resource/v1"
2222
"k8s.io/kubernetes/pkg/scheduler/framework"
2323
)
2424

@@ -29,11 +29,10 @@ func DeviceClassMapping(draManager framework.SharedDRAManager) (map[v1.ResourceN
2929
return nil, err
3030
}
3131
for _, c := range classes {
32-
if c.Spec.ExtendedResourceName == nil {
33-
extendedResources[v1.ResourceName(v1beta1.ResourceDeviceClassPrefix+c.Name)] = c.Name
34-
} else {
32+
if c.Spec.ExtendedResourceName != nil {
3533
extendedResources[v1.ResourceName(*c.Spec.ExtendedResourceName)] = c.Name
3634
}
35+
extendedResources[v1.ResourceName(resourceapi.ResourceDeviceClassPrefix+c.Name)] = c.Name
3736
}
3837
return extendedResources, nil
3938
}

pkg/scheduler/framework/plugins/noderesources/fit.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ func withDeviceClass(result *preFilterState, draManager framework.SharedDRAManag
241241
continue
242242
}
243243

244-
if v1helper.IsExtendedResourceName(rName) {
244+
if schedutil.IsDRAExtendedResourceName(rName) {
245245
hasExtendedResource = true
246246
break
247247
}

pkg/scheduler/framework/plugins/noderesources/fit_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,3 +1908,63 @@ func TestHaveAnyRequestedResourcesIncreased(t *testing.T) {
19081908
})
19091909
}
19101910
}
1911+
func TestWithDeviceClass(t *testing.T) {
1912+
logger, ctx := ktesting.NewTestContext(t)
1913+
ctx, cancel := context.WithCancel(ctx)
1914+
defer cancel()
1915+
1916+
client := fake.NewSimpleClientset(deviceClassWithExtendResourceName)
1917+
informerFactory := informers.NewSharedInformerFactory(client, 0)
1918+
draManager := dynamicresources.NewDRAManager(ctx, assumecache.NewAssumeCache(logger, informerFactory.Resource().V1().ResourceClaims().Informer(), "resource claim", "", nil), nil, informerFactory)
1919+
informerFactory.Start(ctx.Done())
1920+
t.Cleanup(func() {
1921+
// Now we can wait for all goroutines to stop.
1922+
informerFactory.Shutdown()
1923+
})
1924+
informerFactory.WaitForCacheSync(ctx.Done())
1925+
1926+
testCases := map[string]struct {
1927+
state *preFilterState
1928+
expectedResourceToDeviceClass map[v1.ResourceName]string
1929+
}{
1930+
1931+
"regular extended resource name": {
1932+
state: &preFilterState{
1933+
Resource: framework.Resource{
1934+
ScalarResources: map[v1.ResourceName]int64{extendedResourceA: 1},
1935+
},
1936+
},
1937+
expectedResourceToDeviceClass: map[v1.ResourceName]string{
1938+
v1.ResourceName("deviceclass.resource.kubernetes.io/device-class-name"): deviceClassName,
1939+
v1.ResourceName("extended.resource.dra.io/something"): deviceClassName,
1940+
},
1941+
},
1942+
"implicit extended resource name": {
1943+
state: &preFilterState{
1944+
Resource: framework.Resource{
1945+
ScalarResources: map[v1.ResourceName]int64{v1.ResourceName("deviceclass.resource.kubernetes.io/" + deviceClassName): 1},
1946+
},
1947+
},
1948+
expectedResourceToDeviceClass: map[v1.ResourceName]string{
1949+
v1.ResourceName("deviceclass.resource.kubernetes.io/device-class-name"): deviceClassName,
1950+
v1.ResourceName("extended.resource.dra.io/something"): deviceClassName,
1951+
},
1952+
},
1953+
"no extended resource name": {
1954+
state: &preFilterState{
1955+
Resource: framework.Resource{
1956+
MilliCPU: 1000,
1957+
},
1958+
},
1959+
expectedResourceToDeviceClass: nil,
1960+
},
1961+
}
1962+
for name, tc := range testCases {
1963+
t.Run(name, func(t *testing.T) {
1964+
withDeviceClass(tc.state, draManager)
1965+
if diff := cmp.Diff(tc.state.resourceToDeviceClass, tc.expectedResourceToDeviceClass); diff != "" {
1966+
t.Error("resourceToDeviceClass doesn't match (-expected +actual):\n", diff)
1967+
}
1968+
})
1969+
}
1970+
}

pkg/scheduler/util/utils.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ import (
2020
"context"
2121
"encoding/json"
2222
"fmt"
23+
"strings"
2324
"time"
2425

2526
v1 "k8s.io/api/core/v1"
27+
resourceapi "k8s.io/api/resource/v1"
2628
apierrors "k8s.io/apimachinery/pkg/api/errors"
2729
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2830
"k8s.io/apimachinery/pkg/types"
@@ -142,6 +144,12 @@ func IsScalarResourceName(name v1.ResourceName) bool {
142144
v1helper.IsPrefixedNativeResource(name) || v1helper.IsAttachableVolumeResourceName(name)
143145
}
144146

147+
// IsDRAExtendedResourceName returns true when name is an extended resource name, or an implicit extended resource name
148+
// derived from device class name with the format of deviceclass.resource.kubernetes.io/<device class name>
149+
func IsDRAExtendedResourceName(name v1.ResourceName) bool {
150+
return v1helper.IsExtendedResourceName(name) || strings.HasPrefix(string(name), resourceapi.ResourceDeviceClassPrefix)
151+
}
152+
145153
// As converts two objects to the given type.
146154
// Both objects must be of the same type. If not, an error is returned.
147155
// nil objects are allowed and will be converted to nil.

test/e2e/dra/dra.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,25 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
19101910
b := drautils.NewBuilder(f, driver)
19111911
b.UseExtendedResourceName = true
19121912

1913+
ginkgo.It("must run a pod with both implicit and explicit extended resource with one container two resources", func(ctx context.Context) {
1914+
pod := b.Pod()
1915+
res := v1.ResourceList{}
1916+
// drautils.ExtendedResourceName(0) is added to the deivce class with name: b.ClassName()+"0"
1917+
res[v1.ResourceName("deviceclass.resource.kubernetes.io/"+b.ClassName()+"0")] = resource.MustParse("1")
1918+
res[v1.ResourceName(drautils.ExtendedResourceName(0))] = resource.MustParse("1")
1919+
pod.Spec.Containers[0].Resources.Requests = res
1920+
pod.Spec.Containers[0].Resources.Limits = res
1921+
1922+
b.Create(ctx, pod)
1923+
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
1924+
framework.ExpectNoError(err, "start pod")
1925+
containerEnv := []string{
1926+
"container_0_request_0", "true",
1927+
"container_0_request_1", "true",
1928+
}
1929+
drautils.TestContainerEnv(ctx, f, pod, pod.Spec.Containers[0].Name, false, containerEnv...)
1930+
})
1931+
19131932
ginkgo.It("must run a pod with extended resource with one container one resource", func(ctx context.Context) {
19141933
pod := b.Pod()
19151934
res := v1.ResourceList{}

0 commit comments

Comments
 (0)