From e2f398a0402365c1222d32d51767b293eb31cae2 Mon Sep 17 00:00:00 2001 From: Lionel Villard Date: Thu, 1 May 2025 13:50:41 -0400 Subject: [PATCH 1/2] Provide a way to enable the PDFilter --- pkg/epp/scheduling/local_config.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pkg/epp/scheduling/local_config.go b/pkg/epp/scheduling/local_config.go index 2e261a87a..fe4d0b3b7 100644 --- a/pkg/epp/scheduling/local_config.go +++ b/pkg/epp/scheduling/local_config.go @@ -18,7 +18,9 @@ package scheduling import ( "context" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorer" envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env" @@ -28,6 +30,7 @@ import ( const ( kvCacheScorerEnablementEnvVar = "ENABLE_KVCACHE_AWARE_SCORER" loadAwareScorerEnablementEnvVar = "ENABLE_LOAD_AWARE_SCORER" + pdFilterEnablementEnvVar = "ENABLE_PD_FILTER" kvCacheScorerWeightEnvVar = "KVCACHE_AWARE_SCORER_WEIGHT" loadAwareScorerWeightEnvVar = "LOAD_AWARE_SCORER_WEIGHT" @@ -38,6 +41,7 @@ func setDefaultConfig() { // this configuration is a temporary state, it should be better streamlined. setLoadAwareScorer() setKVCacheAwareScorer() + setPDFilter() defaultConfig.picker = picker.NewMaxScorePicker() } @@ -75,3 +79,15 @@ func setKVCacheAwareScorer() { defaultConfig.scorers[kvCacheScorer] = kvCacheScorerWeight loggerDebug.Info("Initialized KVCacheAwareScorer", "weight", kvCacheScorerWeight) } + +func setPDFilter() { + ctx := context.Background() + loggerDebug := log.FromContext(ctx).WithName("scheduler_config").V(logutil.DEBUG) + + if envutil.GetEnvString(pdFilterEnablementEnvVar, "false", loggerDebug) != "true" { + loggerDebug.Info("Skipping PDFilter creation as it is not enabled") + return + } + + defaultConfig.filters = append(defaultConfig.filters, filter.PDFilter) +} From 01c043e461dccf44193c2bc1dd861c960fdabcef Mon Sep 17 00:00:00 2001 From: Lionel Villard Date: Thu, 1 May 2025 14:08:01 -0400 Subject: [PATCH 2/2] update readme --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 12d4186ee..dd262dcfc 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ To enable LoadAwareScorer, the following env vars must be configured: export ENABLE_LOAD_AWARE_SCORER=true export LOAD_AWARE_SCORER_WEIGHT=1.0 ``` + +To enable PDFilter, the following env var must be configured: +``` +export ENABLE_PD_FILTER=true +``` --- [Inference Gateways]:#concepts-and-definitions @@ -96,8 +101,8 @@ See our website at https://gateway-api-inference-extension.sigs.k8s.io/ for deta ## Roadmap As Inference Gateway builds towards a GA release. We will continue to expand our capabilities, namely: -1. Prefix-cache aware load balancing with interfaces for remote caches -1. Recommended LoRA adapter pipeline for automated rollout +1. Prefix-cache aware load balancing with interfaces for remote caches +1. Recommended LoRA adapter pipeline for automated rollout 1. Fairness and priority between workloads within the same criticality band 1. HPA support for autoscaling on aggregate metrics derived from the load balancer 1. Support for large multi-modal inputs and outputs @@ -121,4 +126,3 @@ Contributions are readily welcomed, follow the [dev guide](./docs/dev.md) to sta ### Code of conduct Participation in the Kubernetes community is governed by the [Kubernetes Code of Conduct](code-of-conduct.md). -