From 47e97de8338b8811c5b115726b944b72c69e2be3 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Mon, 16 Mar 2026 11:33:09 +0100 Subject: [PATCH 1/9] Initial implementation --- cmd/main.go | 9 + helm/bundles/cortex-nova/values.yaml | 1 + .../nova/hypervisor_overcommit_controller.go | 219 ++++++++++++++++++ pkg/conf/conf.go | 28 ++- 4 files changed, 251 insertions(+), 6 deletions(-) create mode 100644 internal/scheduling/nova/hypervisor_overcommit_controller.go diff --git a/cmd/main.go b/cmd/main.go index 43ae63f9c..46a244de1 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -369,6 +369,15 @@ func main() { os.Exit(1) } } + if slices.Contains(mainConfig.EnabledControllers, "hypervisor-overcommit-controller") { + hypervisorOvercommitController := &nova.HypervisorOvercommitController{} + hypervisorOvercommitController.Client = multiclusterClient + if err := hypervisorOvercommitController.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", + "controller", "HypervisorOvercommitController") + os.Exit(1) + } + } if slices.Contains(mainConfig.EnabledControllers, "manila-decisions-pipeline-controller") { controller := &manila.FilterWeigherPipelineController{ Monitor: filterWeigherPipelineMonitor, diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index 200ba3ff3..a8ecf63f1 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -113,6 +113,7 @@ cortex-scheduling-controllers: enabledControllers: - nova-pipeline-controllers - nova-deschedulings-executor + - hypervisor-overcommit-controller - explanation-controller - reservations-controller enabledTasks: diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller.go b/internal/scheduling/nova/hypervisor_overcommit_controller.go new file mode 100644 index 000000000..59ce7bf58 --- /dev/null +++ b/internal/scheduling/nova/hypervisor_overcommit_controller.go @@ -0,0 +1,219 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package nova + +import ( + "context" + "errors" + "fmt" + "maps" + "slices" + + "github.com/cobaltcore-dev/cortex/pkg/conf" + "github.com/cobaltcore-dev/cortex/pkg/multicluster" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/util/workqueue" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +// HypervisorOvercommitMapping maps hypervisor types to their desired +// overcommit ratios. This mapping will be loaded from a configmap +// that is mounted into the controller pod. +type HypervisorOvercommitMapping struct { + // Overcommit is the overcommit ratio to set for hypervisors by resource name. + // Values must be set to something >= 1.0, otherwise the controller will + // ignore them. + Overcommit map[hv1.ResourceName]float64 `json:"overcommit"` + + // Trait specifies a trait that a hypervisor may have, and that, if present, + // triggers the controller to set the overcommit ratio specified in Value + // for that hypervisor. + Trait *string `json:"trait,omitempty"` +} + +// Validate the provided HypervisorOvercommitMapping, returning an error if the +// mapping is invalid. +func (m *HypervisorOvercommitMapping) Validate() error { + for resource, overcommit := range m.Overcommit { + if overcommit < 1.0 { + return errors.New("Invalid overcommit ratio in config, must be >= 1.0. " + + "Invalid value for resource " + string(resource) + ": " + + fmt.Sprintf("%f", overcommit)) + } + } + return nil +} + +// HypervisorOvercommitConfig holds the configuration for the +// HypervisorOvercommitController and is loaded from a configmap that is mounted +// into the controller pod. +type HypervisorOvercommitConfig struct { + // OvercommitMappings is a list of mappings that map hypervisor traits to + // overcommit ratios. Note that this list is applied in order, so if there + // are multiple mappings applying to the same hypervisors, the last mapping + // in this list will override the previous ones. + OvercommitMappings []HypervisorOvercommitMapping `json:"overcommitMappings"` +} + +// Validate the provided HypervisorOvercommitConfig, returning an error if the +// config is invalid. +func (c *HypervisorOvercommitConfig) Validate() error { + // Check that all the individual mappings are valid. + for _, mapping := range c.OvercommitMappings { + if err := mapping.Validate(); err != nil { + return err + } + } + return nil +} + +// HypervisorOvercommitController is a controller that reconciles on the +// hypervisor crd and sets desired overcommit ratios based on the hypervisor +// type. +type HypervisorOvercommitController struct { + client.Client + + // config holds the configuration for the controller, which is loaded from a + // configmap that is mounted into the controller pod. + config HypervisorOvercommitConfig +} + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.2/pkg/reconcile +// +// For more details about the method shape, read up here: +// - https://ahmet.im/blog/controller-pitfalls/#reconcile-method-shape +func (c *HypervisorOvercommitController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling resource") + + obj := new(hv1.Hypervisor) + if err := c.Get(ctx, req.NamespacedName, obj); err != nil { + if apierrors.IsNotFound(err) { + // If the custom resource is not found then it usually means + // that it was deleted or not created. + log.Info("Resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + log.Error(err, "Failed to get resource") + return ctrl.Result{}, err + } + + // Map traits to desired overcommit ratios and apply them if they + // differ from the current overcommit ratios on the hypervisor. + traitMappings := make(map[string]map[hv1.ResourceName]float64) + for _, mapping := range c.config.OvercommitMappings { + if mapping.Trait != nil { + traitMappings[*mapping.Trait] = mapping.Overcommit + } + } + desiredOvercommit := make(map[hv1.ResourceName]float64) + for trait, overcommit := range traitMappings { + // Only consider applied traits which are reflected in the status. + if slices.Contains(obj.Status.Traits, trait) { + log.Info("Applying overcommit mapping for trait", + "trait", trait, "overcommit", overcommit) + maps.Copy(desiredOvercommit, overcommit) + } + } + log.Info("Desired overcommit ratios based on traits", + "desiredOvercommit", desiredOvercommit) + if maps.Equal(desiredOvercommit, obj.Spec.Overcommit) { + log.Info("Overcommit ratios are up to date, no update needed") + return ctrl.Result{}, nil + } + + // Update the desired overcommit ratios on the hypervisor spec. + orig := obj.DeepCopy() + obj.Spec.Overcommit = desiredOvercommit + if err := c.Patch(ctx, obj, client.MergeFrom(orig)); err != nil { + log.Error(err, "Failed to update hypervisor overcommit ratios") + return ctrl.Result{}, err + } + log.Info("Updated hypervisor with new overcommit ratios", + "overcommit", desiredOvercommit) + + return ctrl.Result{}, nil +} + +// handleRemoteHypervisor is called by watches in remote clusters and triggers +// a reconcile on the hypervisor resource that was changed in the remote cluster. +func (c *HypervisorOvercommitController) handleRemoteHypervisor() handler.EventHandler { + handler := handler.Funcs{} + handler.CreateFunc = func(ctx context.Context, evt event.CreateEvent, + queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { + + queue.Add(ctrl.Request{NamespacedName: client.ObjectKey{ + Name: evt.Object.(*hv1.Hypervisor).Name, // cluster-scoped crd + }}) + } + handler.UpdateFunc = func(ctx context.Context, evt event.UpdateEvent, + queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { + + queue.Add(ctrl.Request{NamespacedName: client.ObjectKey{ + Name: evt.ObjectOld.(*hv1.Hypervisor).Name, // cluster-scoped crd + }}) + } + handler.DeleteFunc = func(ctx context.Context, evt event.DeleteEvent, + queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { + + queue.Add(ctrl.Request{NamespacedName: client.ObjectKey{ + Name: evt.Object.(*hv1.Hypervisor).Name, // cluster-scoped crd + }}) + } + return handler +} + +// predicateRemoteHypervisor is used to filter events from remote clusters, +// so that only events for hypervisors that should be processed by this +// controller will trigger reconciliations. +func (c *HypervisorOvercommitController) predicateRemoteHypervisor() predicate.Predicate { + // Currently we're watching all hypervisors. In this way, if a trait + // gets removed from the hypervisor, we'll still reconcile this + // hypervisor and update the overcommit ratios accordingly. + return predicate.NewPredicateFuncs(func(object client.Object) bool { + _, ok := object.(*hv1.Hypervisor) + return ok + }) +} + +// SetupWithManager sets up the controller with the Manager and a multicluster +// client. The multicluster client is used to watch for changes in the +// Hypervisor CRD across all clusters and trigger reconciliations accordingly. +func (c *HypervisorOvercommitController) SetupWithManager(mgr ctrl.Manager) (err error) { + // This will load the config in a safe way and gracefully handle errors. + c.config, err = conf.GetConfig[HypervisorOvercommitConfig]() + if err != nil { + return err + } + // Validate we don't have any weird values in the config. + if err := c.config.Validate(); err != nil { + return err + } + // Check that the provided client is a multicluster client, since we need + // that to watch for hypervisors across clusters. + mcl, ok := c.Client.(*multicluster.Client) + if !ok { + return errors.New("provided client must be a multicluster client") + } + return multicluster. + BuildController(mcl, mgr). + // The hypervisor crd may be distributed across multiple remote clusters. + WatchesMulticluster(&hv1.Hypervisor{}, + c.handleRemoteHypervisor(), + c.predicateRemoteHypervisor(), + ). + Named("hypervisor-overcommit-controller"). + Complete(c) +} diff --git a/pkg/conf/conf.go b/pkg/conf/conf.go index 595b33bf2..b0feb02c2 100644 --- a/pkg/conf/conf.go +++ b/pkg/conf/conf.go @@ -17,35 +17,51 @@ import ( // // The values read from secrets.json will override the values in conf.json func GetConfigOrDie[C any]() C { + c, err := GetConfig[C]() + if err != nil { + panic(err) + } + return c +} + +// Create a new configuration from the default config json file. +// Return an error if the config cannot be read or parsed. +// +// This will read two files: +// - /etc/config/conf.json +// - /etc/secrets/secrets.json +// +// The values read from secrets.json will override the values in conf.json +func GetConfig[C any]() (C, error) { // Note: We need to read the config as a raw map first, to avoid golang // unmarshalling default values for the fields. // Read the base config from the configmap (not including secrets). cmConf, err := readRawConfig("/etc/config/conf.json") if err != nil { - panic(err) + return *new(C), err } // Read the secrets config from the kubernetes secret. secretConf, err := readRawConfig("/etc/secrets/secrets.json") if err != nil { - panic(err) + return *new(C), err } return newConfigFromMaps[C](cmConf, secretConf) } -func newConfigFromMaps[C any](base, override map[string]any) C { +func newConfigFromMaps[C any](base, override map[string]any) (C, error) { // Merge the base config with the override config. mergedConf := mergeMaps(base, override) // Marshal again, and then unmarshal into the config struct. mergedBytes, err := json.Marshal(mergedConf) if err != nil { - panic(err) + return *new(C), err } var c C if err := json.Unmarshal(mergedBytes, &c); err != nil { - panic(err) + return *new(C), err } - return c + return c, nil } // Read the json as a map from the given file path. From f3dc0254f190693360c01936699aa33fc8cb6669 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Mon, 16 Mar 2026 13:12:33 +0100 Subject: [PATCH 2/9] Add example conf and implement tests --- helm/bundles/cortex-nova/values.yaml | 5 + .../hypervisor_overcommit_controller_test.go | 718 ++++++++++++++++++ 2 files changed, 723 insertions(+) create mode 100644 internal/scheduling/nova/hypervisor_overcommit_controller_test.go diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index a8ecf63f1..c38b8bfc4 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -121,6 +121,11 @@ cortex-scheduling-controllers: # Endpoints configuration for reservations controller endpoints: novaExternalScheduler: "http://localhost:8080/scheduler/nova/external" + # OvercommitMappings is a list of mappings that map hypervisor traits to + # overcommit ratios. Note that this list is applied in order, so if there + # are multiple mappings applying to the same hypervisors, the last mapping + # in this list will override the previous ones. + overcommitMappings: [] cortex-knowledge-controllers: <<: *cortex diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go new file mode 100644 index 000000000..a9ce33255 --- /dev/null +++ b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go @@ -0,0 +1,718 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package nova + +import ( + "context" + "reflect" + "strings" + "testing" + + testlib "github.com/cobaltcore-dev/cortex/pkg/testing" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +func newTestHypervisorScheme(t *testing.T) *runtime.Scheme { + t.Helper() + scheme := runtime.NewScheme() + if err := hv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add hv1 to scheme: %v", err) + } + return scheme +} + +func TestHypervisorOvercommitMapping_Validate(t *testing.T) { + tests := []struct { + name string + mapping HypervisorOvercommitMapping + expectError bool + errorMsg string + }{ + { + name: "valid mapping with single resource", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + }, + expectError: false, + }, + { + name: "valid mapping with multiple resources", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + hv1.ResourceMemory: 1.5, + }, + }, + expectError: false, + }, + { + name: "valid mapping with exact 1.0 ratio", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 1.0, + }, + }, + expectError: false, + }, + { + name: "valid mapping with trait", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-memory"), + }, + expectError: false, + }, + { + name: "invalid mapping with ratio less than 1.0", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 0.5, + }, + }, + expectError: true, + errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", + }, + { + name: "invalid mapping with zero ratio", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 0.0, + }, + }, + expectError: true, + errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", + }, + { + name: "invalid mapping with negative ratio", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: -1.0, + }, + }, + expectError: true, + errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", + }, + { + name: "valid mapping with empty overcommit", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + expectError: false, + }, + { + name: "valid mapping with nil overcommit", + mapping: HypervisorOvercommitMapping{}, + expectError: false, + }, + { + name: "mixed valid and invalid ratios", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + hv1.ResourceMemory: 0.5, // invalid + }, + }, + expectError: true, + errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.mapping.Validate() + + if tt.expectError && err == nil { + t.Error("Expected error but got none") + } + if !tt.expectError && err != nil { + t.Errorf("Expected no error but got: %v", err) + } + if tt.expectError && err != nil && tt.errorMsg != "" { + if !strings.Contains(err.Error(), tt.errorMsg) { + t.Errorf("Expected error to contain %q, got: %v", tt.errorMsg, err) + } + } + }) + } +} + +func TestHypervisorOvercommitConfig_Validate(t *testing.T) { + tests := []struct { + name string + config HypervisorOvercommitConfig + expectError bool + }{ + { + name: "valid config with single mapping", + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-cpu"), + }, + }, + }, + expectError: false, + }, + { + name: "valid config with multiple mappings", + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-cpu"), + }, + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceMemory: 1.5, + }, + Trait: testlib.Ptr("high-memory"), + }, + }, + }, + expectError: false, + }, + { + name: "valid config with empty mappings", + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{}, + }, + expectError: false, + }, + { + name: "valid config with nil mappings", + config: HypervisorOvercommitConfig{}, + expectError: false, + }, + { + name: "invalid config with one bad mapping", + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-cpu"), + }, + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 0.5, // invalid + }, + Trait: testlib.Ptr("low-cpu"), + }, + }, + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.config.Validate() + + if tt.expectError && err == nil { + t.Error("Expected error but got none") + } + if !tt.expectError && err != nil { + t.Errorf("Expected no error but got: %v", err) + } + }) + } +} + +func TestHypervisorOvercommitController_Reconcile(t *testing.T) { + scheme := newTestHypervisorScheme(t) + + tests := []struct { + name string + hypervisor *hv1.Hypervisor + config HypervisorOvercommitConfig + expectError bool + expectUpdate bool + expectedOvercommitValues map[hv1.ResourceName]float64 + }{ + { + name: "hypervisor with matching trait gets overcommit updated", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"high-cpu", "standard"}, + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-cpu"), + }, + }, + }, + expectError: false, + expectUpdate: true, + expectedOvercommitValues: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + }, + { + name: "hypervisor without matching trait keeps original overcommit", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 1.5, + }, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"standard", "other"}, + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-cpu"), + }, + }, + }, + expectError: false, + expectUpdate: true, // Update to empty overcommit since no traits match + expectedOvercommitValues: nil, + }, + { + name: "hypervisor already has desired overcommit - no update needed", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"high-cpu"}, + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-cpu"), + }, + }, + }, + expectError: false, + expectUpdate: false, + expectedOvercommitValues: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + }, + { + name: "only second trait from config matches hypervisor traits", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"trait-b", "other"}, // Only trait-b matches config + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("trait-a"), + }, + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 3.0, + }, + Trait: testlib.Ptr("trait-b"), + }, + }, + }, + expectError: false, + expectUpdate: true, + expectedOvercommitValues: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 3.0, // Only trait-b matches + }, + }, + { + name: "multiple trait mappings with different resources", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"cpu-trait", "memory-trait"}, + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("cpu-trait"), + }, + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceMemory: 1.5, + }, + Trait: testlib.Ptr("memory-trait"), + }, + }, + }, + expectError: false, + expectUpdate: true, + expectedOvercommitValues: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + hv1.ResourceMemory: 1.5, + }, + }, + { + name: "mapping without trait is ignored", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"any-trait"}, + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + // Trait is nil - this mapping should be ignored + }, + }, + }, + expectError: false, + expectUpdate: false, // No update since mapping has no trait + expectedOvercommitValues: nil, + }, + { + name: "hypervisor with no traits gets empty overcommit", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{}, // No traits + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + Trait: testlib.Ptr("high-cpu"), + }, + }, + }, + expectError: false, + expectUpdate: true, + expectedOvercommitValues: nil, + }, + { + name: "empty config results in empty overcommit", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"any-trait"}, + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{}, + }, + expectError: false, + expectUpdate: true, + expectedOvercommitValues: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := []client.Object{} + if tt.hypervisor != nil { + objects = append(objects, tt.hypervisor) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + Build() + + controller := &HypervisorOvercommitController{ + Client: fakeClient, + config: tt.config, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: tt.hypervisor.Name, + }, + } + + result, err := controller.Reconcile(context.Background(), req) + + if tt.expectError && err == nil { + t.Error("Expected error but got none") + } + if !tt.expectError && err != nil { + t.Errorf("Expected no error but got: %v", err) + } + + // Result should not requeue + if result.RequeueAfter > 0 { + t.Error("Expected no requeue") + } + + // Check the updated hypervisor + var updatedHypervisor hv1.Hypervisor + if err := fakeClient.Get(context.Background(), req.NamespacedName, &updatedHypervisor); err != nil { + t.Fatalf("Failed to get updated hypervisor: %v", err) + } + + if tt.expectedOvercommitValues != nil { + if !reflect.DeepEqual(updatedHypervisor.Spec.Overcommit, tt.expectedOvercommitValues) { + t.Errorf("Expected overcommit values %v, got %v", + tt.expectedOvercommitValues, updatedHypervisor.Spec.Overcommit) + } + } + }) + } +} + +func TestHypervisorOvercommitController_Reconcile_HypervisorNotFound(t *testing.T) { + scheme := newTestHypervisorScheme(t) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + controller := &HypervisorOvercommitController{ + Client: fakeClient, + config: HypervisorOvercommitConfig{}, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: "nonexistent-hypervisor", + }, + } + + result, err := controller.Reconcile(context.Background(), req) + + // Should not return an error for not found + if err != nil { + t.Errorf("Expected no error for not found hypervisor, got: %v", err) + } + + // Should not requeue + if result.RequeueAfter > 0 { + t.Error("Expected no requeue for not found hypervisor") + } +} + +func TestHypervisorOvercommitController_handleRemoteHypervisor(t *testing.T) { + controller := &HypervisorOvercommitController{} + handler := controller.handleRemoteHypervisor() + + // Test CreateFunc + t.Run("CreateFunc adds request to queue", func(t *testing.T) { + hypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor-create", + }, + } + + queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) + defer queue.ShutDown() + + createEvt := event.CreateEvent{ + Object: hypervisor, + } + + handler.Create(context.Background(), createEvt, queue) + + if queue.Len() != 1 { + t.Errorf("Expected 1 item in queue, got %d", queue.Len()) + } + + item, _ := queue.Get() + if item.Name != "test-hypervisor-create" { + t.Errorf("Expected request name 'test-hypervisor-create', got '%s'", item.Name) + } + queue.Done(item) + }) + + // Test UpdateFunc + t.Run("UpdateFunc adds request to queue", func(t *testing.T) { + oldHypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor-update", + }, + } + newHypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor-update", + }, + } + + queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) + defer queue.ShutDown() + + updateEvt := event.UpdateEvent{ + ObjectOld: oldHypervisor, + ObjectNew: newHypervisor, + } + + handler.Update(context.Background(), updateEvt, queue) + + if queue.Len() != 1 { + t.Errorf("Expected 1 item in queue, got %d", queue.Len()) + } + + item, _ := queue.Get() + if item.Name != "test-hypervisor-update" { + t.Errorf("Expected request name 'test-hypervisor-update', got '%s'", item.Name) + } + queue.Done(item) + }) + + // Test DeleteFunc + t.Run("DeleteFunc adds request to queue", func(t *testing.T) { + hypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor-delete", + }, + } + + queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) + defer queue.ShutDown() + + deleteEvt := event.DeleteEvent{ + Object: hypervisor, + } + + handler.Delete(context.Background(), deleteEvt, queue) + + if queue.Len() != 1 { + t.Errorf("Expected 1 item in queue, got %d", queue.Len()) + } + + item, _ := queue.Get() + if item.Name != "test-hypervisor-delete" { + t.Errorf("Expected request name 'test-hypervisor-delete', got '%s'", item.Name) + } + queue.Done(item) + }) +} + +func TestHypervisorOvercommitController_predicateRemoteHypervisor(t *testing.T) { + controller := &HypervisorOvercommitController{} + predicate := controller.predicateRemoteHypervisor() + + tests := []struct { + name string + object client.Object + expectedResult bool + }{ + { + name: "Hypervisor object returns true", + object: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + }, + expectedResult: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test Create predicate + createEvt := event.CreateEvent{Object: tt.object} + if result := predicate.Create(createEvt); result != tt.expectedResult { + t.Errorf("Create predicate: expected %v, got %v", tt.expectedResult, result) + } + + // Test Update predicate + updateEvt := event.UpdateEvent{ObjectOld: tt.object, ObjectNew: tt.object} + if result := predicate.Update(updateEvt); result != tt.expectedResult { + t.Errorf("Update predicate: expected %v, got %v", tt.expectedResult, result) + } + + // Test Delete predicate + deleteEvt := event.DeleteEvent{Object: tt.object} + if result := predicate.Delete(deleteEvt); result != tt.expectedResult { + t.Errorf("Delete predicate: expected %v, got %v", tt.expectedResult, result) + } + + // Test Generic predicate + genericEvt := event.GenericEvent{Object: tt.object} + if result := predicate.Generic(genericEvt); result != tt.expectedResult { + t.Errorf("Generic predicate: expected %v, got %v", tt.expectedResult, result) + } + }) + } +} From cc07418b9a4f7537763bedb7cf829c2cc8b283f2 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Mon, 16 Mar 2026 13:16:06 +0100 Subject: [PATCH 3/9] Add rbac --- helm/library/cortex/templates/rbac/hypervisor_role.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/helm/library/cortex/templates/rbac/hypervisor_role.yaml b/helm/library/cortex/templates/rbac/hypervisor_role.yaml index 14b61e5de..9ecbebd64 100644 --- a/helm/library/cortex/templates/rbac/hypervisor_role.yaml +++ b/helm/library/cortex/templates/rbac/hypervisor_role.yaml @@ -12,8 +12,12 @@ rules: resources: - hypervisors verbs: + - create + - delete - get - list + - patch + - update - watch - apiGroups: - kvm.cloud.sap From 03def8e5a52014a8f16049e037307e42c73092ab Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Mon, 16 Mar 2026 13:17:06 +0100 Subject: [PATCH 4/9] Add TODO for after the PR --- helm/library/cortex/templates/rbac/hypervisor_role.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/helm/library/cortex/templates/rbac/hypervisor_role.yaml b/helm/library/cortex/templates/rbac/hypervisor_role.yaml index 9ecbebd64..3852734a3 100644 --- a/helm/library/cortex/templates/rbac/hypervisor_role.yaml +++ b/helm/library/cortex/templates/rbac/hypervisor_role.yaml @@ -1,5 +1,6 @@ {{- if .Values.rbac.hypervisor.enable }} --- +# TODO: Check if this role can be part of the nova bundle, not the core library apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: From 20568f81a8b953d8b8855eca3ef6a7003cd64819 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Mon, 16 Mar 2026 14:13:13 +0100 Subject: [PATCH 5/9] PR Feedback --- .../nova/hypervisor_overcommit_controller.go | 24 +++++------ .../hypervisor_overcommit_controller_test.go | 43 +++++++++++++++++++ 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller.go b/internal/scheduling/nova/hypervisor_overcommit_controller.go index 59ce7bf58..c8e475942 100644 --- a/internal/scheduling/nova/hypervisor_overcommit_controller.go +++ b/internal/scheduling/nova/hypervisor_overcommit_controller.go @@ -110,22 +110,22 @@ func (c *HypervisorOvercommitController) Reconcile(ctx context.Context, req ctrl return ctrl.Result{}, err } - // Map traits to desired overcommit ratios and apply them if they - // differ from the current overcommit ratios on the hypervisor. - traitMappings := make(map[string]map[hv1.ResourceName]float64) + // Build desired overcommit ratios by iterating mappings in order. + // Later mappings override earlier ones for the same resource, preserving + // non-overlapping resources from previous mappings. + desiredOvercommit := make(map[hv1.ResourceName]float64) for _, mapping := range c.config.OvercommitMappings { - if mapping.Trait != nil { - traitMappings[*mapping.Trait] = mapping.Overcommit + // Skip mappings without a trait specified. + if mapping.Trait == nil { + continue } - } - desiredOvercommit := make(map[hv1.ResourceName]float64) - for trait, overcommit := range traitMappings { // Only consider applied traits which are reflected in the status. - if slices.Contains(obj.Status.Traits, trait) { - log.Info("Applying overcommit mapping for trait", - "trait", trait, "overcommit", overcommit) - maps.Copy(desiredOvercommit, overcommit) + if !slices.Contains(obj.Status.Traits, *mapping.Trait) { + continue } + log.Info("Applying overcommit mapping for trait", + "trait", *mapping.Trait, "overcommit", mapping.Overcommit) + maps.Copy(desiredOvercommit, mapping.Overcommit) } log.Info("Desired overcommit ratios based on traits", "desiredOvercommit", desiredOvercommit) diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go index a9ce33255..e06671743 100644 --- a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go +++ b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go @@ -487,6 +487,43 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { expectUpdate: true, expectedOvercommitValues: nil, }, + { + name: "later mappings override earlier ones for same resource - order preserved", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"trait-first", "trait-second"}, // Has both traits + }, + }, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + hv1.ResourceMemory: 1.5, + }, + Trait: testlib.Ptr("trait-first"), + }, + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, // Override CPU from earlier mapping + }, + Trait: testlib.Ptr("trait-second"), + }, + }, + }, + expectError: false, + expectUpdate: true, + expectedOvercommitValues: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, // Later mapping wins for CPU + hv1.ResourceMemory: 1.5, // Memory preserved from earlier mapping + }, + }, } for _, tt := range tests { @@ -537,6 +574,12 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { t.Errorf("Expected overcommit values %v, got %v", tt.expectedOvercommitValues, updatedHypervisor.Spec.Overcommit) } + } else { + // When expectedOvercommitValues is nil, verify that old overcommit ratios were cleared + if len(updatedHypervisor.Spec.Overcommit) != 0 { + t.Errorf("Expected overcommit to be nil or empty when no traits match, got %v", + updatedHypervisor.Spec.Overcommit) + } } }) } From eaf1ee08d00c2baea9ea249a782da20a36b318fe Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Mon, 16 Mar 2026 14:30:44 +0100 Subject: [PATCH 6/9] Support exclusion by traits and rewrite tests --- .../nova/hypervisor_overcommit_controller.go | 34 +- .../hypervisor_overcommit_controller_test.go | 728 ++++++++++-------- 2 files changed, 429 insertions(+), 333 deletions(-) diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller.go b/internal/scheduling/nova/hypervisor_overcommit_controller.go index c8e475942..f8905a975 100644 --- a/internal/scheduling/nova/hypervisor_overcommit_controller.go +++ b/internal/scheduling/nova/hypervisor_overcommit_controller.go @@ -32,10 +32,15 @@ type HypervisorOvercommitMapping struct { // ignore them. Overcommit map[hv1.ResourceName]float64 `json:"overcommit"` - // Trait specifies a trait that a hypervisor may have, and that, if present, - // triggers the controller to set the overcommit ratio specified in Value - // for that hypervisor. - Trait *string `json:"trait,omitempty"` + // HasTrait specifies a trait that a hypervisor may have, and that, if present, + // triggers the controller to set the overcommit ratio specified in the + // overcommit field for that hypervisor. + HasTrait *string `json:"hasTrait,omitempty"` + + // HasntTrait specifies a trait that a hypervisor may have, and that, if + // NOT present, triggers the controller to set the overcommit ratio + // specified in the overcommit field for that hypervisor. + HasntTrait *string `json:"hasntTrait,omitempty"` } // Validate the provided HypervisorOvercommitMapping, returning an error if the @@ -116,15 +121,28 @@ func (c *HypervisorOvercommitController) Reconcile(ctx context.Context, req ctrl desiredOvercommit := make(map[hv1.ResourceName]float64) for _, mapping := range c.config.OvercommitMappings { // Skip mappings without a trait specified. - if mapping.Trait == nil { + if mapping.HasTrait == nil { continue } // Only consider applied traits which are reflected in the status. - if !slices.Contains(obj.Status.Traits, *mapping.Trait) { + if !slices.Contains(obj.Status.Traits, *mapping.HasTrait) { continue } - log.Info("Applying overcommit mapping for trait", - "trait", *mapping.Trait, "overcommit", mapping.Overcommit) + log.Info("Applying overcommit mapping for trait present on hypervisor", + "trait", *mapping.HasTrait, "overcommit", mapping.Overcommit) + maps.Copy(desiredOvercommit, mapping.Overcommit) + } + for _, mapping := range c.config.OvercommitMappings { + // Skip mappings without a trait specified. + if mapping.HasntTrait == nil { + continue + } + // Only consider applied traits which are reflected in the status. + if slices.Contains(obj.Status.Traits, *mapping.HasntTrait) { + continue + } + log.Info("Applying overcommit mapping for trait not present on hypervisor", + "trait", *mapping.HasntTrait, "overcommit", mapping.Overcommit) maps.Copy(desiredOvercommit, mapping.Overcommit) } log.Info("Desired overcommit ratios based on traits", diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go index e06671743..8390f6e57 100644 --- a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go +++ b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go @@ -5,11 +5,8 @@ package nova import ( "context" - "reflect" - "strings" "testing" - testlib "github.com/cobaltcore-dev/cortex/pkg/testing" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -22,33 +19,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -func newTestHypervisorScheme(t *testing.T) *runtime.Scheme { - t.Helper() - scheme := runtime.NewScheme() - if err := hv1.AddToScheme(scheme); err != nil { - t.Fatalf("failed to add hv1 to scheme: %v", err) - } - return scheme -} - func TestHypervisorOvercommitMapping_Validate(t *testing.T) { tests := []struct { name string mapping HypervisorOvercommitMapping expectError bool - errorMsg string }{ { - name: "valid mapping with single resource", - mapping: HypervisorOvercommitMapping{ - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - }, - }, - expectError: false, - }, - { - name: "valid mapping with multiple resources", + name: "valid overcommit ratios", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, @@ -58,7 +36,7 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { expectError: false, }, { - name: "valid mapping with exact 1.0 ratio", + name: "valid minimum overcommit ratio of 1.0", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 1.0, @@ -67,59 +45,48 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { expectError: false, }, { - name: "valid mapping with trait", - mapping: HypervisorOvercommitMapping{ - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - }, - Trait: testlib.Ptr("high-memory"), - }, - expectError: false, - }, - { - name: "invalid mapping with ratio less than 1.0", + name: "invalid overcommit ratio less than 1.0", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 0.5, }, }, expectError: true, - errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", }, { - name: "invalid mapping with zero ratio", + name: "invalid overcommit ratio of zero", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 0.0, + hv1.ResourceMemory: 0.0, }, }, expectError: true, - errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", }, { - name: "invalid mapping with negative ratio", + name: "invalid negative overcommit ratio", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: -1.0, }, }, expectError: true, - errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", }, { - name: "valid mapping with empty overcommit", + name: "empty overcommit map is valid", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{}, }, expectError: false, }, { - name: "valid mapping with nil overcommit", - mapping: HypervisorOvercommitMapping{}, + name: "nil overcommit map is valid", + mapping: HypervisorOvercommitMapping{ + Overcommit: nil, + }, expectError: false, }, { - name: "mixed valid and invalid ratios", + name: "mixed valid and invalid overcommit ratios", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, @@ -127,30 +94,24 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { }, }, expectError: true, - errorMsg: "Invalid overcommit ratio in config, must be >= 1.0", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { err := tt.mapping.Validate() - if tt.expectError && err == nil { - t.Error("Expected error but got none") + t.Error("expected error but got nil") } if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } - if tt.expectError && err != nil && tt.errorMsg != "" { - if !strings.Contains(err.Error(), tt.errorMsg) { - t.Errorf("Expected error to contain %q, got: %v", tt.errorMsg, err) - } + t.Errorf("expected no error but got: %v", err) } }) } } func TestHypervisorOvercommitConfig_Validate(t *testing.T) { + trait := "CUSTOM_GPU" tests := []struct { name string config HypervisorOvercommitConfig @@ -164,7 +125,7 @@ func TestHypervisorOvercommitConfig_Validate(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - Trait: testlib.Ptr("high-cpu"), + HasTrait: &trait, }, }, }, @@ -178,79 +139,85 @@ func TestHypervisorOvercommitConfig_Validate(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - Trait: testlib.Ptr("high-cpu"), + HasTrait: &trait, }, { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceMemory: 1.5, }, - Trait: testlib.Ptr("high-memory"), }, }, }, expectError: false, }, { - name: "valid config with empty mappings", - config: HypervisorOvercommitConfig{ - OvercommitMappings: []HypervisorOvercommitMapping{}, - }, - expectError: false, - }, - { - name: "valid config with nil mappings", - config: HypervisorOvercommitConfig{}, - expectError: false, - }, - { - name: "invalid config with one bad mapping", + name: "invalid config with bad mapping", config: HypervisorOvercommitConfig{ OvercommitMappings: []HypervisorOvercommitMapping{ - { - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - }, - Trait: testlib.Ptr("high-cpu"), - }, { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 0.5, // invalid }, - Trait: testlib.Ptr("low-cpu"), }, }, }, expectError: true, }, + { + name: "empty config is valid", + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{}, + }, + expectError: false, + }, + { + name: "nil mappings is valid", + config: HypervisorOvercommitConfig{ + OvercommitMappings: nil, + }, + expectError: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { err := tt.config.Validate() - if tt.expectError && err == nil { - t.Error("Expected error but got none") + t.Error("expected error but got nil") } if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) + t.Errorf("expected no error but got: %v", err) } }) } } +func newTestHypervisorScheme(t *testing.T) *runtime.Scheme { + t.Helper() + scheme := runtime.NewScheme() + if err := hv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add hv1 to scheme: %v", err) + } + return scheme +} + func TestHypervisorOvercommitController_Reconcile(t *testing.T) { scheme := newTestHypervisorScheme(t) + gpuTrait := "CUSTOM_GPU" + standardTrait := "CUSTOM_STANDARD" + missingTrait := "CUSTOM_MISSING" + tests := []struct { - name string - hypervisor *hv1.Hypervisor - config HypervisorOvercommitConfig - expectError bool - expectUpdate bool - expectedOvercommitValues map[hv1.ResourceName]float64 + name string + hypervisor *hv1.Hypervisor + config HypervisorOvercommitConfig + expectedOvercommit map[hv1.ResourceName]float64 + expectNoUpdate bool + expectNotFoundError bool }{ { - name: "hypervisor with matching trait gets overcommit updated", + name: "apply overcommit for matching HasTrait", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", @@ -259,38 +226,34 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{}, }, Status: hv1.HypervisorStatus{ - Traits: []string{"high-cpu", "standard"}, + Traits: []string{"CUSTOM_GPU"}, }, }, config: HypervisorOvercommitConfig{ OvercommitMappings: []HypervisorOvercommitMapping{ { Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, + hv1.ResourceCPU: 4.0, }, - Trait: testlib.Ptr("high-cpu"), + HasTrait: &gpuTrait, }, }, }, - expectError: false, - expectUpdate: true, - expectedOvercommitValues: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, + expectedOvercommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, }, }, { - name: "hypervisor without matching trait keeps original overcommit", + name: "apply overcommit for matching HasntTrait", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", }, Spec: hv1.HypervisorSpec{ - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 1.5, - }, + Overcommit: map[hv1.ResourceName]float64{}, }, Status: hv1.HypervisorStatus{ - Traits: []string{"standard", "other"}, + Traits: []string{}, // missing trait }, }, config: HypervisorOvercommitConfig{ @@ -299,47 +262,41 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - Trait: testlib.Ptr("high-cpu"), + HasntTrait: &missingTrait, }, }, }, - expectError: false, - expectUpdate: true, // Update to empty overcommit since no traits match - expectedOvercommitValues: nil, + expectedOvercommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, }, { - name: "hypervisor already has desired overcommit - no update needed", + name: "skip mapping when HasTrait not present", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", }, Spec: hv1.HypervisorSpec{ - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - }, + Overcommit: map[hv1.ResourceName]float64{}, }, Status: hv1.HypervisorStatus{ - Traits: []string{"high-cpu"}, + Traits: []string{"CUSTOM_OTHER"}, }, }, config: HypervisorOvercommitConfig{ OvercommitMappings: []HypervisorOvercommitMapping{ { Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, + hv1.ResourceCPU: 4.0, }, - Trait: testlib.Ptr("high-cpu"), + HasTrait: &gpuTrait, }, }, }, - expectError: false, - expectUpdate: false, - expectedOvercommitValues: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - }, + expectedOvercommit: map[hv1.ResourceName]float64{}, }, { - name: "only second trait from config matches hypervisor traits", + name: "skip mapping when HasntTrait is present", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", @@ -348,7 +305,7 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{}, }, Status: hv1.HypervisorStatus{ - Traits: []string{"trait-b", "other"}, // Only trait-b matches config + Traits: []string{"CUSTOM_GPU"}, // trait is present }, }, config: HypervisorOvercommitConfig{ @@ -357,24 +314,14 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - Trait: testlib.Ptr("trait-a"), - }, - { - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 3.0, - }, - Trait: testlib.Ptr("trait-b"), + HasntTrait: &gpuTrait, // should skip because GPU trait IS present }, }, }, - expectError: false, - expectUpdate: true, - expectedOvercommitValues: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 3.0, // Only trait-b matches - }, + expectedOvercommit: map[hv1.ResourceName]float64{}, }, { - name: "multiple trait mappings with different resources", + name: "later mappings override earlier ones", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", @@ -383,7 +330,7 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{}, }, Status: hv1.HypervisorStatus{ - Traits: []string{"cpu-trait", "memory-trait"}, + Traits: []string{"CUSTOM_GPU", "CUSTOM_STANDARD"}, }, }, config: HypervisorOvercommitConfig{ @@ -392,63 +339,61 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - Trait: testlib.Ptr("cpu-trait"), + HasTrait: &standardTrait, }, { Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceMemory: 1.5, + hv1.ResourceCPU: 4.0, // should override the first }, - Trait: testlib.Ptr("memory-trait"), + HasTrait: &gpuTrait, }, }, }, - expectError: false, - expectUpdate: true, - expectedOvercommitValues: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - hv1.ResourceMemory: 1.5, + expectedOvercommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, }, }, { - name: "mapping without trait is ignored", + name: "no update when overcommit already matches", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", }, Spec: hv1.HypervisorSpec{ - Overcommit: map[hv1.ResourceName]float64{}, + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, + }, }, Status: hv1.HypervisorStatus{ - Traits: []string{"any-trait"}, + Traits: []string{"CUSTOM_GPU"}, }, }, config: HypervisorOvercommitConfig{ OvercommitMappings: []HypervisorOvercommitMapping{ { Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, + hv1.ResourceCPU: 4.0, }, - // Trait is nil - this mapping should be ignored + HasTrait: &gpuTrait, }, }, }, - expectError: false, - expectUpdate: false, // No update since mapping has no trait - expectedOvercommitValues: nil, + expectedOvercommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, + }, + expectNoUpdate: true, }, { - name: "hypervisor with no traits gets empty overcommit", + name: "skip mapping without trait specified", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", }, Spec: hv1.HypervisorSpec{ - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - }, + Overcommit: map[hv1.ResourceName]float64{}, }, Status: hv1.HypervisorStatus{ - Traits: []string{}, // No traits + Traits: []string{"CUSTOM_GPU"}, }, }, config: HypervisorOvercommitConfig{ @@ -457,38 +402,14 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - Trait: testlib.Ptr("high-cpu"), - }, - }, - }, - expectError: false, - expectUpdate: true, - expectedOvercommitValues: nil, - }, - { - name: "empty config results in empty overcommit", - hypervisor: &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-hypervisor", - }, - Spec: hv1.HypervisorSpec{ - Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, + // No HasTrait or HasntTrait specified }, }, - Status: hv1.HypervisorStatus{ - Traits: []string{"any-trait"}, - }, }, - config: HypervisorOvercommitConfig{ - OvercommitMappings: []HypervisorOvercommitMapping{}, - }, - expectError: false, - expectUpdate: true, - expectedOvercommitValues: nil, + expectedOvercommit: map[hv1.ResourceName]float64{}, }, { - name: "later mappings override earlier ones for same resource - order preserved", + name: "combine HasTrait and HasntTrait mappings", hypervisor: &hv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hypervisor", @@ -497,47 +418,57 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{}, }, Status: hv1.HypervisorStatus{ - Traits: []string{"trait-first", "trait-second"}, // Has both traits + Traits: []string{"CUSTOM_GPU"}, // has GPU, doesn't have STANDARD }, }, config: HypervisorOvercommitConfig{ OvercommitMappings: []HypervisorOvercommitMapping{ { Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 2.0, - hv1.ResourceMemory: 1.5, + hv1.ResourceCPU: 4.0, }, - Trait: testlib.Ptr("trait-first"), + HasTrait: &gpuTrait, }, { Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 4.0, // Override CPU from earlier mapping + hv1.ResourceMemory: 1.5, }, - Trait: testlib.Ptr("trait-second"), + HasntTrait: &standardTrait, // STANDARD not present }, }, }, - expectError: false, - expectUpdate: true, - expectedOvercommitValues: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 4.0, // Later mapping wins for CPU - hv1.ResourceMemory: 1.5, // Memory preserved from earlier mapping + expectedOvercommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, + hv1.ResourceMemory: 1.5, }, }, + { + name: "hypervisor not found", + hypervisor: &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nonexistent", + }, + }, + config: HypervisorOvercommitConfig{}, + expectNotFoundError: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{} - if tt.hypervisor != nil { - objects = append(objects, tt.hypervisor) + var fakeClient client.Client + if tt.expectNotFoundError { + // Don't add the hypervisor to the fake client + fakeClient = fake.NewClientBuilder(). + WithScheme(scheme). + Build() + } else { + fakeClient = fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tt.hypervisor). + Build() } - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - Build() - controller := &HypervisorOvercommitController{ Client: fakeClient, config: tt.config, @@ -549,43 +480,51 @@ func TestHypervisorOvercommitController_Reconcile(t *testing.T) { }, } - result, err := controller.Reconcile(context.Background(), req) + ctx := context.Background() + result, err := controller.Reconcile(ctx, req) - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) + if err != nil { + t.Errorf("unexpected error: %v", err) + return } - // Result should not requeue if result.RequeueAfter > 0 { - t.Error("Expected no requeue") + t.Error("expected no requeue") + } + + if tt.expectNotFoundError { + // For not found case, we expect no error and no requeue + return } - // Check the updated hypervisor - var updatedHypervisor hv1.Hypervisor - if err := fakeClient.Get(context.Background(), req.NamespacedName, &updatedHypervisor); err != nil { - t.Fatalf("Failed to get updated hypervisor: %v", err) + // Get the updated hypervisor + updated := &hv1.Hypervisor{} + if err := fakeClient.Get(ctx, req.NamespacedName, updated); err != nil { + t.Fatalf("failed to get updated hypervisor: %v", err) } - if tt.expectedOvercommitValues != nil { - if !reflect.DeepEqual(updatedHypervisor.Spec.Overcommit, tt.expectedOvercommitValues) { - t.Errorf("Expected overcommit values %v, got %v", - tt.expectedOvercommitValues, updatedHypervisor.Spec.Overcommit) + // Check overcommit ratios + if len(updated.Spec.Overcommit) != len(tt.expectedOvercommit) { + t.Errorf("expected %d overcommit entries, got %d", + len(tt.expectedOvercommit), len(updated.Spec.Overcommit)) + } + + for resource, expected := range tt.expectedOvercommit { + actual, ok := updated.Spec.Overcommit[resource] + if !ok { + t.Errorf("expected overcommit for resource %s, but not found", resource) + continue } - } else { - // When expectedOvercommitValues is nil, verify that old overcommit ratios were cleared - if len(updatedHypervisor.Spec.Overcommit) != 0 { - t.Errorf("Expected overcommit to be nil or empty when no traits match, got %v", - updatedHypervisor.Spec.Overcommit) + if actual != expected { + t.Errorf("expected overcommit %f for resource %s, got %f", + expected, resource, actual) } } }) } } -func TestHypervisorOvercommitController_Reconcile_HypervisorNotFound(t *testing.T) { +func TestHypervisorOvercommitController_ReconcileNotFound(t *testing.T) { scheme := newTestHypervisorScheme(t) fakeClient := fake.NewClientBuilder(). @@ -603,159 +542,298 @@ func TestHypervisorOvercommitController_Reconcile_HypervisorNotFound(t *testing. }, } - result, err := controller.Reconcile(context.Background(), req) + ctx := context.Background() + result, err := controller.Reconcile(ctx, req) - // Should not return an error for not found if err != nil { - t.Errorf("Expected no error for not found hypervisor, got: %v", err) + t.Errorf("expected no error for not found resource, got: %v", err) } - // Should not requeue if result.RequeueAfter > 0 { - t.Error("Expected no requeue for not found hypervisor") + t.Error("expected no requeue for not found resource") } } -func TestHypervisorOvercommitController_handleRemoteHypervisor(t *testing.T) { +// mockWorkQueue implements workqueue.TypedRateLimitingInterface for testing +type mockWorkQueue struct { + workqueue.TypedRateLimitingInterface[reconcile.Request] + items []reconcile.Request +} + +func (m *mockWorkQueue) Add(item reconcile.Request) { + m.items = append(m.items, item) +} + +func TestHypervisorOvercommitController_HandleRemoteHypervisor(t *testing.T) { controller := &HypervisorOvercommitController{} handler := controller.handleRemoteHypervisor() - // Test CreateFunc - t.Run("CreateFunc adds request to queue", func(t *testing.T) { - hypervisor := &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-hypervisor-create", - }, - } + hypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + } + + ctx := context.Background() - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - defer queue.ShutDown() + t.Run("CreateFunc", func(t *testing.T) { + queue := &mockWorkQueue{} + handler.Create(ctx, event.CreateEvent{Object: hypervisor}, queue) - createEvt := event.CreateEvent{ - Object: hypervisor, + if len(queue.items) != 1 { + t.Errorf("expected 1 item in queue, got %d", len(queue.items)) + } + if queue.items[0].Name != "test-hypervisor" { + t.Errorf("expected hypervisor name 'test-hypervisor', got %s", queue.items[0].Name) } + }) - handler.Create(context.Background(), createEvt, queue) + t.Run("UpdateFunc", func(t *testing.T) { + queue := &mockWorkQueue{} + handler.Update(ctx, event.UpdateEvent{ + ObjectOld: hypervisor, + ObjectNew: hypervisor, + }, queue) - if queue.Len() != 1 { - t.Errorf("Expected 1 item in queue, got %d", queue.Len()) + if len(queue.items) != 1 { + t.Errorf("expected 1 item in queue, got %d", len(queue.items)) } - - item, _ := queue.Get() - if item.Name != "test-hypervisor-create" { - t.Errorf("Expected request name 'test-hypervisor-create', got '%s'", item.Name) + if queue.items[0].Name != "test-hypervisor" { + t.Errorf("expected hypervisor name 'test-hypervisor', got %s", queue.items[0].Name) } - queue.Done(item) }) - // Test UpdateFunc - t.Run("UpdateFunc adds request to queue", func(t *testing.T) { - oldHypervisor := &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-hypervisor-update", - }, + t.Run("DeleteFunc", func(t *testing.T) { + queue := &mockWorkQueue{} + handler.Delete(ctx, event.DeleteEvent{Object: hypervisor}, queue) + + if len(queue.items) != 1 { + t.Errorf("expected 1 item in queue, got %d", len(queue.items)) } - newHypervisor := &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-hypervisor-update", - }, + if queue.items[0].Name != "test-hypervisor" { + t.Errorf("expected hypervisor name 'test-hypervisor', got %s", queue.items[0].Name) } + }) +} - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - defer queue.ShutDown() +func TestHypervisorOvercommitController_PredicateRemoteHypervisor(t *testing.T) { + controller := &HypervisorOvercommitController{} + predicate := controller.predicateRemoteHypervisor() - updateEvt := event.UpdateEvent{ - ObjectOld: oldHypervisor, - ObjectNew: newHypervisor, + t.Run("accepts Hypervisor objects", func(t *testing.T) { + hypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, } - handler.Update(context.Background(), updateEvt, queue) + if !predicate.Generic(event.GenericEvent{Object: hypervisor}) { + t.Error("expected predicate to accept Hypervisor object") + } + }) - if queue.Len() != 1 { - t.Errorf("Expected 1 item in queue, got %d", queue.Len()) + t.Run("rejects non-Hypervisor objects", func(t *testing.T) { + // Create a non-Hypervisor object by using a different type + // We'll test with a nil object which should return false + type nonHypervisor struct { + client.Object } - item, _ := queue.Get() - if item.Name != "test-hypervisor-update" { - t.Errorf("Expected request name 'test-hypervisor-update', got '%s'", item.Name) + if predicate.Generic(event.GenericEvent{Object: &nonHypervisor{}}) { + t.Error("expected predicate to reject non-Hypervisor object") } - queue.Done(item) }) +} - // Test DeleteFunc - t.Run("DeleteFunc adds request to queue", func(t *testing.T) { - hypervisor := &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-hypervisor-delete", +func TestHypervisorOvercommitController_SetupWithManager_InvalidClient(t *testing.T) { + scheme := newTestHypervisorScheme(t) + + // Create a regular fake client (not a multicluster client) + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + controller := &HypervisorOvercommitController{ + Client: fakeClient, + } + + // SetupWithManager should fail because the client is not a multicluster client + // Note: We can't fully test this without a real manager, but we can verify + // that the controller requires a multicluster client + if controller.Client == nil { + t.Error("expected client to be set") + } +} + +func TestHypervisorOvercommitController_Reconcile_PatchError(t *testing.T) { + scheme := newTestHypervisorScheme(t) + + gpuTrait := "CUSTOM_GPU" + hypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"CUSTOM_GPU"}, + }, + } + + // Create a fake client with the hypervisor + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(hypervisor). + Build() + + controller := &HypervisorOvercommitController{ + Client: fakeClient, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, + }, + HasTrait: &gpuTrait, + }, }, - } + }, + } - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - defer queue.ShutDown() + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: hypervisor.Name, + }, + } - deleteEvt := event.DeleteEvent{ - Object: hypervisor, - } + ctx := context.Background() + _, err := controller.Reconcile(ctx, req) - handler.Delete(context.Background(), deleteEvt, queue) + // With a proper fake client, the patch should succeed + if err != nil { + t.Errorf("unexpected error: %v", err) + } +} - if queue.Len() != 1 { - t.Errorf("Expected 1 item in queue, got %d", queue.Len()) - } +func TestHypervisorOvercommitController_Reconcile_EmptyConfig(t *testing.T) { + scheme := newTestHypervisorScheme(t) - item, _ := queue.Get() - if item.Name != "test-hypervisor-delete" { - t.Errorf("Expected request name 'test-hypervisor-delete', got '%s'", item.Name) - } - queue.Done(item) - }) + hypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"CUSTOM_GPU"}, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(hypervisor). + Build() + + controller := &HypervisorOvercommitController{ + Client: fakeClient, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{}, + }, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: hypervisor.Name, + }, + } + + ctx := context.Background() + result, err := controller.Reconcile(ctx, req) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + if result.RequeueAfter > 0 { + t.Error("expected no requeue") + } + + // Verify no changes were made + updated := &hv1.Hypervisor{} + if err := fakeClient.Get(ctx, req.NamespacedName, updated); err != nil { + t.Fatalf("failed to get updated hypervisor: %v", err) + } + + if len(updated.Spec.Overcommit) != 0 { + t.Errorf("expected empty overcommit, got %v", updated.Spec.Overcommit) + } } -func TestHypervisorOvercommitController_predicateRemoteHypervisor(t *testing.T) { - controller := &HypervisorOvercommitController{} - predicate := controller.predicateRemoteHypervisor() +func TestHypervisorOvercommitController_Reconcile_MultipleResources(t *testing.T) { + scheme := newTestHypervisorScheme(t) - tests := []struct { - name string - object client.Object - expectedResult bool - }{ - { - name: "Hypervisor object returns true", - object: &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-hypervisor", + gpuTrait := "CUSTOM_GPU" + hypervisor := &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hypervisor", + }, + Spec: hv1.HypervisorSpec{ + Overcommit: map[hv1.ResourceName]float64{}, + }, + Status: hv1.HypervisorStatus{ + Traits: []string{"CUSTOM_GPU"}, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(hypervisor). + Build() + + controller := &HypervisorOvercommitController{ + Client: fakeClient, + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 4.0, + hv1.ResourceMemory: 1.5, + }, + HasTrait: &gpuTrait, }, }, - expectedResult: true, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Test Create predicate - createEvt := event.CreateEvent{Object: tt.object} - if result := predicate.Create(createEvt); result != tt.expectedResult { - t.Errorf("Create predicate: expected %v, got %v", tt.expectedResult, result) - } + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: hypervisor.Name, + }, + } - // Test Update predicate - updateEvt := event.UpdateEvent{ObjectOld: tt.object, ObjectNew: tt.object} - if result := predicate.Update(updateEvt); result != tt.expectedResult { - t.Errorf("Update predicate: expected %v, got %v", tt.expectedResult, result) - } + ctx := context.Background() + _, err := controller.Reconcile(ctx, req) - // Test Delete predicate - deleteEvt := event.DeleteEvent{Object: tt.object} - if result := predicate.Delete(deleteEvt); result != tt.expectedResult { - t.Errorf("Delete predicate: expected %v, got %v", tt.expectedResult, result) - } + if err != nil { + t.Errorf("unexpected error: %v", err) + } - // Test Generic predicate - genericEvt := event.GenericEvent{Object: tt.object} - if result := predicate.Generic(genericEvt); result != tt.expectedResult { - t.Errorf("Generic predicate: expected %v, got %v", tt.expectedResult, result) - } - }) + updated := &hv1.Hypervisor{} + if err := fakeClient.Get(ctx, req.NamespacedName, updated); err != nil { + t.Fatalf("failed to get updated hypervisor: %v", err) + } + + if len(updated.Spec.Overcommit) != 2 { + t.Errorf("expected 2 overcommit entries, got %d", len(updated.Spec.Overcommit)) + } + + if updated.Spec.Overcommit[hv1.ResourceCPU] != 4.0 { + t.Errorf("expected CPU overcommit 4.0, got %f", updated.Spec.Overcommit[hv1.ResourceCPU]) + } + + if updated.Spec.Overcommit[hv1.ResourceMemory] != 1.5 { + t.Errorf("expected Memory overcommit 1.5, got %f", updated.Spec.Overcommit[hv1.ResourceMemory]) } } From a1a30978991201887eb442428fe68c5b187566b4 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Mon, 16 Mar 2026 14:50:20 +0100 Subject: [PATCH 7/9] PR Feedback --- .../hypervisor_overcommit_controller_test.go | 59 +++++++++++++++---- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go index 8390f6e57..188a84f44 100644 --- a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go +++ b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go @@ -5,6 +5,8 @@ package nova import ( "context" + "errors" + "strings" "testing" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" @@ -657,12 +659,38 @@ func TestHypervisorOvercommitController_SetupWithManager_InvalidClient(t *testin Client: fakeClient, } - // SetupWithManager should fail because the client is not a multicluster client - // Note: We can't fully test this without a real manager, but we can verify - // that the controller requires a multicluster client - if controller.Client == nil { - t.Error("expected client to be set") + // Create a minimal mock manager for testing + mgr := &mockManager{scheme: scheme} + + // SetupWithManager should fail - either because config loading fails + // (in test environment without config files) or because the client + // is not a multicluster client. + err := controller.SetupWithManager(mgr) + if err == nil { + t.Error("expected error when calling SetupWithManager, got nil") } + // The error could be either about missing config or about multicluster client + // depending on the test environment. We just verify an error is returned. +} + +// mockManager implements ctrl.Manager for testing SetupWithManager +type mockManager struct { + ctrl.Manager + scheme *runtime.Scheme +} + +func (m *mockManager) GetScheme() *runtime.Scheme { + return m.scheme +} + +// patchFailingClient wraps a client.Client and returns an error on Patch calls +type patchFailingClient struct { + client.Client + patchErr error +} + +func (c *patchFailingClient) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + return c.patchErr } func TestHypervisorOvercommitController_Reconcile_PatchError(t *testing.T) { @@ -681,14 +709,20 @@ func TestHypervisorOvercommitController_Reconcile_PatchError(t *testing.T) { }, } - // Create a fake client with the hypervisor - fakeClient := fake.NewClientBuilder(). + // Create a fake client with the hypervisor, then wrap it to fail on Patch + baseClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(hypervisor). Build() + patchErr := errors.New("patch failed") + failingClient := &patchFailingClient{ + Client: baseClient, + patchErr: patchErr, + } + controller := &HypervisorOvercommitController{ - Client: fakeClient, + Client: failingClient, config: HypervisorOvercommitConfig{ OvercommitMappings: []HypervisorOvercommitMapping{ { @@ -710,9 +744,12 @@ func TestHypervisorOvercommitController_Reconcile_PatchError(t *testing.T) { ctx := context.Background() _, err := controller.Reconcile(ctx, req) - // With a proper fake client, the patch should succeed - if err != nil { - t.Errorf("unexpected error: %v", err) + // Reconcile should return an error when Patch fails + if err == nil { + t.Error("expected error when Patch fails, got nil") + } + if !strings.Contains(err.Error(), "patch failed") { + t.Errorf("expected error message to contain 'patch failed', got: %v", err) } } From fee60782f346dcaa5a79c8ddf25b8bda556dcf54 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Tue, 17 Mar 2026 13:18:57 +0100 Subject: [PATCH 8/9] Remove create and delete access --- helm/library/cortex/templates/rbac/hypervisor_role.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/helm/library/cortex/templates/rbac/hypervisor_role.yaml b/helm/library/cortex/templates/rbac/hypervisor_role.yaml index 3852734a3..0a2fefa00 100644 --- a/helm/library/cortex/templates/rbac/hypervisor_role.yaml +++ b/helm/library/cortex/templates/rbac/hypervisor_role.yaml @@ -13,8 +13,6 @@ rules: resources: - hypervisors verbs: - - create - - delete - get - list - patch From 66b94b79e114bae2cded7416c508d1c8b561c2b3 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Tue, 17 Mar 2026 13:27:14 +0100 Subject: [PATCH 9/9] Mutual exclusion between HasTrait and HasntTrait --- .../nova/hypervisor_overcommit_controller.go | 45 ++++++----- .../hypervisor_overcommit_controller_test.go | 74 +++++++++++++++++-- 2 files changed, 92 insertions(+), 27 deletions(-) diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller.go b/internal/scheduling/nova/hypervisor_overcommit_controller.go index f8905a975..8d54475f0 100644 --- a/internal/scheduling/nova/hypervisor_overcommit_controller.go +++ b/internal/scheduling/nova/hypervisor_overcommit_controller.go @@ -48,10 +48,20 @@ type HypervisorOvercommitMapping struct { func (m *HypervisorOvercommitMapping) Validate() error { for resource, overcommit := range m.Overcommit { if overcommit < 1.0 { - return errors.New("Invalid overcommit ratio in config, must be >= 1.0. " + + return errors.New("invalid overcommit ratio in config, must be >= 1.0. " + "Invalid value for resource " + string(resource) + ": " + fmt.Sprintf("%f", overcommit)) } + // Has trait and hasn't trait are mutually exclusive, so if both are set + // we return an error. + if m.HasTrait != nil && m.HasntTrait != nil { + return errors.New("invalid overcommit mapping, hasTrait and hasntTrait are mutually exclusive") + } + // At least one of has trait and hasn't trait must be set, + // otherwise we don't know when to apply this mapping. + if m.HasTrait == nil && m.HasntTrait == nil { + return errors.New("invalid overcommit mapping, at least one of hasTrait and hasntTrait must be set") + } } return nil } @@ -120,29 +130,24 @@ func (c *HypervisorOvercommitController) Reconcile(ctx context.Context, req ctrl // non-overlapping resources from previous mappings. desiredOvercommit := make(map[hv1.ResourceName]float64) for _, mapping := range c.config.OvercommitMappings { - // Skip mappings without a trait specified. - if mapping.HasTrait == nil { - continue - } - // Only consider applied traits which are reflected in the status. - if !slices.Contains(obj.Status.Traits, *mapping.HasTrait) { - continue - } - log.Info("Applying overcommit mapping for trait present on hypervisor", - "trait", *mapping.HasTrait, "overcommit", mapping.Overcommit) - maps.Copy(desiredOvercommit, mapping.Overcommit) - } - for _, mapping := range c.config.OvercommitMappings { - // Skip mappings without a trait specified. - if mapping.HasntTrait == nil { + var applyMapping bool + switch { + // These are mutually exclusive. + case mapping.HasTrait != nil: + applyMapping = slices.Contains(obj.Status.Traits, *mapping.HasTrait) + case mapping.HasntTrait != nil: + applyMapping = !slices.Contains(obj.Status.Traits, *mapping.HasntTrait) + default: + // This should never happen due to validation, but we check it just in case. + log.Info("Skipping overcommit mapping with no trait specified", + "overcommit", mapping.Overcommit) continue } - // Only consider applied traits which are reflected in the status. - if slices.Contains(obj.Status.Traits, *mapping.HasntTrait) { + if !applyMapping { continue } - log.Info("Applying overcommit mapping for trait not present on hypervisor", - "trait", *mapping.HasntTrait, "overcommit", mapping.Overcommit) + log.Info("Applying overcommit mapping on hypervisor", + "overcommit", mapping.Overcommit) maps.Copy(desiredOvercommit, mapping.Overcommit) } log.Info("Desired overcommit ratios based on traits", diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go index 188a84f44..391fe356c 100644 --- a/internal/scheduling/nova/hypervisor_overcommit_controller_test.go +++ b/internal/scheduling/nova/hypervisor_overcommit_controller_test.go @@ -22,27 +22,32 @@ import ( ) func TestHypervisorOvercommitMapping_Validate(t *testing.T) { + gpuTrait := "CUSTOM_GPU" + standardTrait := "CUSTOM_STANDARD" + tests := []struct { name string mapping HypervisorOvercommitMapping expectError bool }{ { - name: "valid overcommit ratios", + name: "valid overcommit ratios with HasTrait", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, hv1.ResourceMemory: 1.5, }, + HasTrait: &gpuTrait, }, expectError: false, }, { - name: "valid minimum overcommit ratio of 1.0", + name: "valid minimum overcommit ratio of 1.0 with HasntTrait", mapping: HypervisorOvercommitMapping{ Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 1.0, }, + HasntTrait: &gpuTrait, }, expectError: false, }, @@ -52,6 +57,7 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 0.5, }, + HasTrait: &gpuTrait, }, expectError: true, }, @@ -61,6 +67,7 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceMemory: 0.0, }, + HasTrait: &gpuTrait, }, expectError: true, }, @@ -70,6 +77,7 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: -1.0, }, + HasTrait: &gpuTrait, }, expectError: true, }, @@ -94,6 +102,27 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { hv1.ResourceCPU: 2.0, hv1.ResourceMemory: 0.5, // invalid }, + HasTrait: &gpuTrait, + }, + expectError: true, + }, + { + name: "invalid: both HasTrait and HasntTrait set", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, + HasTrait: &gpuTrait, + HasntTrait: &standardTrait, + }, + expectError: true, + }, + { + name: "invalid: neither HasTrait nor HasntTrait set with non-empty overcommit", + mapping: HypervisorOvercommitMapping{ + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, + }, }, expectError: true, }, @@ -113,7 +142,8 @@ func TestHypervisorOvercommitMapping_Validate(t *testing.T) { } func TestHypervisorOvercommitConfig_Validate(t *testing.T) { - trait := "CUSTOM_GPU" + gpuTrait := "CUSTOM_GPU" + standardTrait := "CUSTOM_STANDARD" tests := []struct { name string config HypervisorOvercommitConfig @@ -127,7 +157,7 @@ func TestHypervisorOvercommitConfig_Validate(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - HasTrait: &trait, + HasTrait: &gpuTrait, }, }, }, @@ -141,24 +171,54 @@ func TestHypervisorOvercommitConfig_Validate(t *testing.T) { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceCPU: 2.0, }, - HasTrait: &trait, + HasTrait: &gpuTrait, }, { Overcommit: map[hv1.ResourceName]float64{ hv1.ResourceMemory: 1.5, }, + HasntTrait: &standardTrait, }, }, }, expectError: false, }, { - name: "invalid config with bad mapping", + name: "invalid config with bad overcommit ratio", + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 0.5, // invalid ratio + }, + HasTrait: &gpuTrait, + }, + }, + }, + expectError: true, + }, + { + name: "invalid config with both HasTrait and HasntTrait", config: HypervisorOvercommitConfig{ OvercommitMappings: []HypervisorOvercommitMapping{ { Overcommit: map[hv1.ResourceName]float64{ - hv1.ResourceCPU: 0.5, // invalid + hv1.ResourceCPU: 2.0, + }, + HasTrait: &gpuTrait, + HasntTrait: &standardTrait, + }, + }, + }, + expectError: true, + }, + { + name: "invalid config with neither HasTrait nor HasntTrait", + config: HypervisorOvercommitConfig{ + OvercommitMappings: []HypervisorOvercommitMapping{ + { + Overcommit: map[hv1.ResourceName]float64{ + hv1.ResourceCPU: 2.0, }, }, },