diff --git a/README.md b/README.md index 12d4186e..dd262dcf 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ To enable LoadAwareScorer, the following env vars must be configured: export ENABLE_LOAD_AWARE_SCORER=true export LOAD_AWARE_SCORER_WEIGHT=1.0 ``` + +To enable PDFilter, the following env var must be configured: +``` +export ENABLE_PD_FILTER=true +``` --- [Inference Gateways]:#concepts-and-definitions @@ -96,8 +101,8 @@ See our website at https://gateway-api-inference-extension.sigs.k8s.io/ for deta ## Roadmap As Inference Gateway builds towards a GA release. We will continue to expand our capabilities, namely: -1. Prefix-cache aware load balancing with interfaces for remote caches -1. Recommended LoRA adapter pipeline for automated rollout +1. Prefix-cache aware load balancing with interfaces for remote caches +1. Recommended LoRA adapter pipeline for automated rollout 1. Fairness and priority between workloads within the same criticality band 1. HPA support for autoscaling on aggregate metrics derived from the load balancer 1. Support for large multi-modal inputs and outputs @@ -121,4 +126,3 @@ Contributions are readily welcomed, follow the [dev guide](./docs/dev.md) to sta ### Code of conduct Participation in the Kubernetes community is governed by the [Kubernetes Code of Conduct](code-of-conduct.md). - diff --git a/pkg/epp/scheduling/local_config.go b/pkg/epp/scheduling/local_config.go index 2e261a87..fe4d0b3b 100644 --- a/pkg/epp/scheduling/local_config.go +++ b/pkg/epp/scheduling/local_config.go @@ -18,7 +18,9 @@ package scheduling import ( "context" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorer" envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env" @@ -28,6 +30,7 @@ import ( const ( kvCacheScorerEnablementEnvVar = "ENABLE_KVCACHE_AWARE_SCORER" loadAwareScorerEnablementEnvVar = "ENABLE_LOAD_AWARE_SCORER" + pdFilterEnablementEnvVar = "ENABLE_PD_FILTER" kvCacheScorerWeightEnvVar = "KVCACHE_AWARE_SCORER_WEIGHT" loadAwareScorerWeightEnvVar = "LOAD_AWARE_SCORER_WEIGHT" @@ -38,6 +41,7 @@ func setDefaultConfig() { // this configuration is a temporary state, it should be better streamlined. setLoadAwareScorer() setKVCacheAwareScorer() + setPDFilter() defaultConfig.picker = picker.NewMaxScorePicker() } @@ -75,3 +79,15 @@ func setKVCacheAwareScorer() { defaultConfig.scorers[kvCacheScorer] = kvCacheScorerWeight loggerDebug.Info("Initialized KVCacheAwareScorer", "weight", kvCacheScorerWeight) } + +func setPDFilter() { + ctx := context.Background() + loggerDebug := log.FromContext(ctx).WithName("scheduler_config").V(logutil.DEBUG) + + if envutil.GetEnvString(pdFilterEnablementEnvVar, "false", loggerDebug) != "true" { + loggerDebug.Info("Skipping PDFilter creation as it is not enabled") + return + } + + defaultConfig.filters = append(defaultConfig.filters, filter.PDFilter) +}