diff --git a/api/external/nova/messages.go b/api/external/nova/messages.go index 789cd17d4..15c652ba4 100644 --- a/api/external/nova/messages.go +++ b/api/external/nova/messages.go @@ -9,6 +9,7 @@ import ( "log/slog" "strings" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) @@ -126,23 +127,21 @@ func (req ExternalSchedulerRequest) GetHypervisorType() (HypervisorType, error) return "", errors.New("hypervisor type not specified in flavor extra specs") } -type RequestIntent string - const ( // LiveMigrationIntent indicates that the request is intended for live migration. - LiveMigrationIntent RequestIntent = "live_migration" + LiveMigrationIntent v1alpha1.SchedulingIntent = "live_migration" // RebuildIntent indicates that the request is intended for rebuilding a VM. - RebuildIntent RequestIntent = "rebuild" + RebuildIntent v1alpha1.SchedulingIntent = "rebuild" // ResizeIntent indicates that the request is intended for resizing a VM. - ResizeIntent RequestIntent = "resize" + ResizeIntent v1alpha1.SchedulingIntent = "resize" // EvacuateIntent indicates that the request is intended for evacuating a VM. - EvacuateIntent RequestIntent = "evacuate" + EvacuateIntent v1alpha1.SchedulingIntent = "evacuate" // CreateIntent indicates that the request is intended for creating a new VM. - CreateIntent RequestIntent = "create" + CreateIntent v1alpha1.SchedulingIntent = "create" ) // GetIntent analyzes the request spec and determines the intent of the scheduling request. -func (req ExternalSchedulerRequest) GetIntent() (RequestIntent, error) { +func (req ExternalSchedulerRequest) GetIntent() (v1alpha1.SchedulingIntent, error) { str, err := req.Spec.Data.GetSchedulerHintStr("_nova_check_type") if err != nil { return "", err diff --git a/api/external/nova/messages_test.go b/api/external/nova/messages_test.go index 1802c5a13..13040da11 100644 --- a/api/external/nova/messages_test.go +++ b/api/external/nova/messages_test.go @@ -6,13 +6,15 @@ package api import ( "encoding/json" "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" ) func TestGetIntent(t *testing.T) { tests := []struct { name string schedulerHints map[string]any - expectedIntent RequestIntent + expectedIntent v1alpha1.SchedulingIntent expectError bool }{ { diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index c3f02de1e..0dbd6418d 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -9,6 +9,15 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// SchedulingIntent defines the intent of a scheduling decision. +type SchedulingIntent string + +// Other intents can be defined by the operators. +const ( + // Used as default intent if the operator does not specify one. + SchedulingIntentUnknown SchedulingIntent = "Unknown" +) + type DecisionSpec struct { // SchedulingDomain defines in which scheduling domain this decision // was or is processed (e.g., nova, cinder, manila). @@ -39,6 +48,10 @@ type DecisionSpec struct { // If the type is "pod", this field contains the pod reference. // +kubebuilder:validation:Optional PodRef *corev1.ObjectReference `json:"podRef,omitempty"` + + // The intent of the scheduling decision (e.g., initial scheduling, rescheduling, etc.). + // +kubebuilder:validation:Optional + Intent SchedulingIntent `json:"intent,omitempty"` } type StepResult struct { @@ -110,7 +123,12 @@ type DecisionStatus struct { // +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" // +kubebuilder:selectablefield:JSONPath=".spec.resourceID" -// Decision is the Schema for the decisions API +// Currently the Decision CRD is an in-memory scheduling object used by the external scheduler API +// and filter-weigher pipelines to compute a placement result. It is currently NOT persisted +// to etcd — the scheduling outcome is recorded in the History CRD instead. +// The long-term shape of this CRD is still under discussion; until that is +// settled the Decision serves only as a transient carrier within a single +// scheduling run. type Decision struct { metav1.TypeMeta `json:",inline"` diff --git a/api/v1alpha1/history_types.go b/api/v1alpha1/history_types.go new file mode 100644 index 000000000..df5e3a65d --- /dev/null +++ b/api/v1alpha1/history_types.go @@ -0,0 +1,127 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type SchedulingHistoryEntry struct { + // The timestamp of when the decision was made. + Timestamp metav1.Time `json:"timestamp"` + // The pipeline that was used for the decision. + PipelineRef corev1.ObjectReference `json:"pipelineRef"` + // The intent of the decision (e.g., initial scheduling, rescheduling, etc.). + Intent SchedulingIntent `json:"intent"` + // The top hosts ordered by score for the decision (limited to 3). + // This is not a complete list of all candidates — only the highest-ranked + // hosts are retained to keep the history compact. + // +kubebuilder:validation:Optional + // +kubebuilder:validation:MaxItems=3 + OrderedHosts []string `json:"orderedHosts,omitempty"` + // Whether the scheduling decision was successful. + // +kubebuilder:validation:Optional + Successful bool `json:"successful"` +} + +const ( + // The scheduling decision is ready (a host was successfully selected). + HistoryConditionReady = "Ready" + // The scheduling decision selected a target host. + HistoryReasonSchedulingSucceeded = "SchedulingSucceeded" + // The pipeline run failed before a host could be selected. + HistoryReasonPipelineRunFailed = "PipelineRunFailed" + // The pipeline completed but no suitable host was found. + HistoryReasonNoHostFound = "NoHostFound" +) + +type HistorySpec struct { + // The scheduling domain this object with the history belongs to. + SchedulingDomain SchedulingDomain `json:"schedulingDomain"` + // The resource ID this history belongs to (e.g., the UUID of a nova instance). + ResourceID string `json:"resourceID"` + // The availability zone of the resource, if known. Only set for scheduling + // domains that provide AZ information (e.g., Nova). + // +kubebuilder:validation:Optional + AvailabilityZone *string `json:"availabilityZone,omitempty"` +} + +// CurrentDecision holds the full context of the most recent scheduling +// decision. When a new decision arrives the previous CurrentDecision is +// compacted into a SchedulingHistoryEntry and appended to History. +type CurrentDecision struct { + // The timestamp of when the decision was made. + Timestamp metav1.Time `json:"timestamp"` + // The pipeline that was used for the decision. + PipelineRef corev1.ObjectReference `json:"pipelineRef"` + // The intent of the decision (e.g., initial scheduling, rescheduling, etc.). + Intent SchedulingIntent `json:"intent"` + // Whether the scheduling decision was successful. + Successful bool `json:"successful"` + // The target host selected for the resource. nil when no host was found. + // +kubebuilder:validation:Optional + TargetHost *string `json:"targetHost,omitempty"` + // A human-readable explanation of the scheduling decision. + // +kubebuilder:validation:Optional + Explanation string `json:"explanation,omitempty"` + // The top hosts ordered by score (limited to 3). + // +kubebuilder:validation:Optional + // +kubebuilder:validation:MaxItems=3 + OrderedHosts []string `json:"orderedHosts,omitempty"` +} + +type HistoryStatus struct { + // Current represents the latest scheduling decision with full context. + // +kubebuilder:validation:Optional + Current CurrentDecision `json:"current,omitempty"` + // History of past scheduling decisions (limited to last 10). + // +kubebuilder:validation:Optional + History []SchedulingHistoryEntry `json:"history,omitempty"` + + // Conditions represent the latest available observations of the history's state. + // +kubebuilder:validation:Optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:scope=Cluster +// +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain" +// +kubebuilder:printcolumn:name="Resource ID",type="string",JSONPath=".spec.resourceID" +// +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone" +// +kubebuilder:printcolumn:name="Target Host",type="string",JSONPath=".status.current.targetHost" +// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].reason" +// +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" + +// The history is a CRD that provides a record of past scheduling decisions for a given resource (e.g., a nova instance). +// A new history entry is created for each scheduling decision, and the most recent decision is stored in the status.current field. The history is capped at 10 entries to prevent unbounded growth. +// This CRD is designed to be used by an operations team to troubleshoot scheduling decisions and understand the context around why a particular host was selected (or not selected) for a resource. +type History struct { + metav1.TypeMeta `json:",inline"` + + // Standard object metadata. + // +optional + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Spec defines the desired state of History. + // +required + Spec HistorySpec `json:"spec"` + // Status defines the observed state of History. + // +optional + Status HistoryStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// HistoryList contains a list of History +type HistoryList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []History `json:"items"` +} + +func init() { + SchemeBuilder.Register(&History{}, &HistoryList{}) +} diff --git a/api/v1alpha1/pipeline_types.go b/api/v1alpha1/pipeline_types.go index 47c069184..180db85e5 100644 --- a/api/v1alpha1/pipeline_types.go +++ b/api/v1alpha1/pipeline_types.go @@ -78,10 +78,10 @@ type PipelineSpec struct { // +kubebuilder:validation:Optional Description string `json:"description,omitempty"` - // If this pipeline should create decision objects. + // If this pipeline should create history objects. // When this is false, the pipeline will still process requests. // +kubebuilder:default=false - CreateDecisions bool `json:"createDecisions,omitempty"` + CreateHistory bool `json:"createHistory,omitempty"` // If this pipeline should ignore host preselection and gather all // available placement candidates before applying filters, instead of diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index d187be943..778c91710 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -97,6 +97,33 @@ func (in *CommittedResourceReservationStatus) DeepCopy() *CommittedResourceReser return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CurrentDecision) DeepCopyInto(out *CurrentDecision) { + *out = *in + in.Timestamp.DeepCopyInto(&out.Timestamp) + out.PipelineRef = in.PipelineRef + if in.TargetHost != nil { + in, out := &in.TargetHost, &out.TargetHost + *out = new(string) + **out = **in + } + if in.OrderedHosts != nil { + in, out := &in.OrderedHosts, &out.OrderedHosts + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CurrentDecision. +func (in *CurrentDecision) DeepCopy() *CurrentDecision { + if in == nil { + return nil + } + out := new(CurrentDecision) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Datasource) DeepCopyInto(out *Datasource) { *out = *in @@ -582,6 +609,115 @@ func (in *FilterSpec) DeepCopy() *FilterSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *History) DeepCopyInto(out *History) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new History. +func (in *History) DeepCopy() *History { + if in == nil { + return nil + } + out := new(History) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *History) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HistoryList) DeepCopyInto(out *HistoryList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]History, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HistoryList. +func (in *HistoryList) DeepCopy() *HistoryList { + if in == nil { + return nil + } + out := new(HistoryList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *HistoryList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HistorySpec) DeepCopyInto(out *HistorySpec) { + *out = *in + if in.AvailabilityZone != nil { + in, out := &in.AvailabilityZone, &out.AvailabilityZone + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HistorySpec. +func (in *HistorySpec) DeepCopy() *HistorySpec { + if in == nil { + return nil + } + out := new(HistorySpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HistoryStatus) DeepCopyInto(out *HistoryStatus) { + *out = *in + in.Current.DeepCopyInto(&out.Current) + if in.History != nil { + in, out := &in.History, &out.History + *out = make([]SchedulingHistoryEntry, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HistoryStatus. +func (in *HistoryStatus) DeepCopy() *HistoryStatus { + if in == nil { + return nil + } + out := new(HistoryStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *IdentityDatasource) DeepCopyInto(out *IdentityDatasource) { *out = *in @@ -1294,6 +1430,28 @@ func (in *ReservationStatus) DeepCopy() *ReservationStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingHistoryEntry) DeepCopyInto(out *SchedulingHistoryEntry) { + *out = *in + in.Timestamp.DeepCopyInto(&out.Timestamp) + out.PipelineRef = in.PipelineRef + if in.OrderedHosts != nil { + in, out := &in.OrderedHosts, &out.OrderedHosts + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingHistoryEntry. +func (in *SchedulingHistoryEntry) DeepCopy() *SchedulingHistoryEntry { + if in == nil { + return nil + } + out := new(SchedulingHistoryEntry) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *StepResult) DeepCopyInto(out *StepResult) { *out = *in diff --git a/cmd/main.go b/cmd/main.go index 001bf5240..52eebbadc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -42,8 +42,9 @@ import ( "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor" "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis" "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder" - "github.com/cobaltcore-dev/cortex/internal/scheduling/explanation" + "github.com/cobaltcore-dev/cortex/internal/scheduling/external" + schedulinglib "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/internal/scheduling/machines" "github.com/cobaltcore-dev/cortex/internal/scheduling/manila" @@ -471,20 +472,7 @@ func main() { os.Exit(1) } } - if slices.Contains(mainConfig.EnabledControllers, "explanation-controller") { - setupLog.Info("enabling controller", "controller", "explanation-controller") - // Setup a controller which will reconcile the history and explanation for - // decision resources. - explanationControllerConfig := conf.GetConfigOrDie[explanation.ControllerConfig]() - explanationController := &explanation.Controller{ - Client: multiclusterClient, - Config: explanationControllerConfig, - } - if err := explanationController.SetupWithManager(mgr, multiclusterClient); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "ExplanationController") - os.Exit(1) - } - } + if slices.Contains(mainConfig.EnabledControllers, "committed-resource-reservations-controller") { setupLog.Info("enabling controller", "controller", "committed-resource-reservations-controller") monitor := reservations.NewMonitor(multiclusterClient) @@ -689,48 +677,48 @@ func main() { os.Exit(1) } } - if slices.Contains(mainConfig.EnabledTasks, "nova-decisions-cleanup-task") { - setupLog.Info("starting nova decisions cleanup task") - decisionsCleanupConfig := conf.GetConfigOrDie[nova.DecisionsCleanupConfig]() + if slices.Contains(mainConfig.EnabledTasks, "nova-history-cleanup-task") { + setupLog.Info("starting nova history cleanup task") + historyCleanupConfig := conf.GetConfigOrDie[nova.HistoryCleanupConfig]() if err := (&task.Runner{ Client: multiclusterClient, Interval: time.Hour, - Name: "nova-decisions-cleanup-task", + Name: "nova-history-cleanup-task", Run: func(ctx context.Context) error { - return nova.DecisionsCleanup(ctx, multiclusterClient, decisionsCleanupConfig) + return nova.HistoryCleanup(ctx, multiclusterClient, historyCleanupConfig) }, }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to add nova decisions cleanup task to manager") + setupLog.Error(err, "unable to add nova history cleanup task to manager") os.Exit(1) } } - if slices.Contains(mainConfig.EnabledTasks, "manila-decisions-cleanup-task") { - setupLog.Info("starting manila decisions cleanup task") - decisionsCleanupConfig := conf.GetConfigOrDie[manila.DecisionsCleanupConfig]() + if slices.Contains(mainConfig.EnabledTasks, "manila-history-cleanup-task") { + setupLog.Info("starting manila history cleanup task") + historyCleanupConfig := conf.GetConfigOrDie[manila.HistoryCleanupConfig]() if err := (&task.Runner{ Client: multiclusterClient, Interval: time.Hour, - Name: "manila-decisions-cleanup-task", + Name: "manila-history-cleanup-task", Run: func(ctx context.Context) error { - return manila.DecisionsCleanup(ctx, multiclusterClient, decisionsCleanupConfig) + return manila.HistoryCleanup(ctx, multiclusterClient, historyCleanupConfig) }, }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to add manila decisions cleanup task to manager") + setupLog.Error(err, "unable to add manila history cleanup task to manager") os.Exit(1) } } - if slices.Contains(mainConfig.EnabledTasks, "cinder-decisions-cleanup-task") { - setupLog.Info("starting cinder decisions cleanup task") - decisionsCleanupConfig := conf.GetConfigOrDie[cinder.DecisionsCleanupConfig]() + if slices.Contains(mainConfig.EnabledTasks, "cinder-history-cleanup-task") { + setupLog.Info("starting cinder history cleanup task") + historyCleanupConfig := conf.GetConfigOrDie[cinder.HistoryCleanupConfig]() if err := (&task.Runner{ Client: multiclusterClient, Interval: time.Hour, - Name: "cinder-decisions-cleanup-task", + Name: "cinder-history-cleanup-task", Run: func(ctx context.Context) error { - return cinder.DecisionsCleanup(ctx, multiclusterClient, decisionsCleanupConfig) + return cinder.HistoryCleanup(ctx, multiclusterClient, historyCleanupConfig) }, }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to add cinder decisions cleanup task to manager") + setupLog.Error(err, "unable to add cinder history cleanup task to manager") os.Exit(1) } } diff --git a/helm/bundles/cortex-cinder/values.yaml b/helm/bundles/cortex-cinder/values.yaml index 0635d6ba9..b3853af04 100644 --- a/helm/bundles/cortex-cinder/values.yaml +++ b/helm/bundles/cortex-cinder/values.yaml @@ -75,6 +75,8 @@ cortex: &cortex gvks: - cortex.cloud/v1alpha1/Decision - cortex.cloud/v1alpha1/DecisionList + - cortex.cloud/v1alpha1/History + - cortex.cloud/v1alpha1/HistoryList - cortex.cloud/v1alpha1/Descheduling - cortex.cloud/v1alpha1/DeschedulingList - cortex.cloud/v1alpha1/Pipeline @@ -115,9 +117,8 @@ cortex-scheduling-controllers: component: cinder-scheduling enabledControllers: - cinder-decisions-pipeline-controller - - explanation-controller enabledTasks: - - cinder-decisions-cleanup-task + - cinder-history-cleanup-task cortex-knowledge-controllers: <<: *cortex diff --git a/helm/bundles/cortex-ironcore/templates/pipelines.yaml b/helm/bundles/cortex-ironcore/templates/pipelines.yaml index 5598e60da..a77991b00 100644 --- a/helm/bundles/cortex-ironcore/templates/pipelines.yaml +++ b/helm/bundles/cortex-ironcore/templates/pipelines.yaml @@ -8,7 +8,7 @@ spec: description: | This pipeline is used to schedule ironcore machines onto machinepools. type: filter-weigher - createDecisions: true + createHistory: false filters: [] weighers: - name: noop diff --git a/helm/bundles/cortex-ironcore/values.yaml b/helm/bundles/cortex-ironcore/values.yaml index 4d7a76404..5017ef0bf 100644 --- a/helm/bundles/cortex-ironcore/values.yaml +++ b/helm/bundles/cortex-ironcore/values.yaml @@ -35,6 +35,8 @@ cortex: gvks: - cortex.cloud/v1alpha1/Decision - cortex.cloud/v1alpha1/DecisionList + - cortex.cloud/v1alpha1/History + - cortex.cloud/v1alpha1/HistoryList - cortex.cloud/v1alpha1/Descheduling - cortex.cloud/v1alpha1/DeschedulingList - cortex.cloud/v1alpha1/Pipeline @@ -56,7 +58,6 @@ cortex: - v1/Secret enabledControllers: - ironcore-decisions-pipeline-controller - - explanation-controller monitoring: labels: github_org: cobaltcore-dev diff --git a/helm/bundles/cortex-manila/values.yaml b/helm/bundles/cortex-manila/values.yaml index 2203fcb52..e6be31d4b 100644 --- a/helm/bundles/cortex-manila/values.yaml +++ b/helm/bundles/cortex-manila/values.yaml @@ -75,6 +75,8 @@ cortex: &cortex gvks: - cortex.cloud/v1alpha1/Decision - cortex.cloud/v1alpha1/DecisionList + - cortex.cloud/v1alpha1/History + - cortex.cloud/v1alpha1/HistoryList - cortex.cloud/v1alpha1/Descheduling - cortex.cloud/v1alpha1/DeschedulingList - cortex.cloud/v1alpha1/Pipeline @@ -115,9 +117,8 @@ cortex-scheduling-controllers: component: manila-scheduling enabledControllers: - manila-decisions-pipeline-controller - - explanation-controller enabledTasks: - - manila-decisions-cleanup-task + - manila-history-cleanup-task cortex-knowledge-controllers: <<: *cortex diff --git a/helm/bundles/cortex-nova/templates/pipelines.yaml b/helm/bundles/cortex-nova/templates/pipelines.yaml index c8aac119b..e1abb1969 100644 --- a/helm/bundles/cortex-nova/templates/pipelines.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines.yaml @@ -14,7 +14,7 @@ spec: Specifically, this pipeline is used for general purpose workloads. type: filter-weigher - createDecisions: false + createHistory: false filters: [] weighers: - name: vmware_binpack @@ -73,7 +73,7 @@ spec: Specifically, this pipeline is used for HANA workloads. type: filter-weigher - createDecisions: false + createHistory: false filters: [] weighers: - name: vmware_binpack diff --git a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml index 0f96ce18a..865906e15 100644 --- a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml @@ -16,7 +16,7 @@ spec: type: filter-weigher # Fetch all placement candidates, ignoring nova's preselection. ignorePreselection: true - createDecisions: false + createHistory: true filters: - name: filter_correct_az description: | @@ -142,7 +142,7 @@ spec: type: filter-weigher # Fetch all placement candidates, ignoring nova's preselection. ignorePreselection: true - createDecisions: false + createHistory: true filters: - name: filter_correct_az description: | @@ -265,7 +265,7 @@ spec: This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). type: filter-weigher - createDecisions: false + createHistory: true # Fetch all placement candidates, ignoring nova's preselection. ignorePreselection: true filters: @@ -382,7 +382,7 @@ spec: compute hosts in order to optimize resource usage and performance. This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). type: detector - createDecisions: false + createHistory: true detectors: - name: avoid_high_steal_pct description: | @@ -406,7 +406,7 @@ spec: Use case: When a VM needs failover protection and there's an existing reservation on a host, this pipeline validates the host is still suitable for the VM. type: filter-weigher - createDecisions: false + createHistory: false filters: - name: filter_host_instructions description: | @@ -470,7 +470,7 @@ spec: that the reservation host can still accommodate all allocated VMs. If validation fails for any VM, the reservation is deleted (nack). type: filter-weigher - createDecisions: false + createHistory: false filters: - name: filter_host_instructions description: | diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index c17b5b169..a1105f945 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -85,6 +85,8 @@ cortex: &cortex gvks: - cortex.cloud/v1alpha1/Decision - cortex.cloud/v1alpha1/DecisionList + - cortex.cloud/v1alpha1/History + - cortex.cloud/v1alpha1/HistoryList - cortex.cloud/v1alpha1/Descheduling - cortex.cloud/v1alpha1/DeschedulingList - cortex.cloud/v1alpha1/Pipeline @@ -132,11 +134,10 @@ cortex-scheduling-controllers: - nova-pipeline-controllers - nova-deschedulings-executor - hypervisor-overcommit-controller - - explanation-controller - committed-resource-reservations-controller - failover-reservations-controller enabledTasks: - - nova-decisions-cleanup-task + - nova-history-cleanup-task # CommittedResourceFlavorGroupPipelines maps flavor group IDs to pipeline names for CR reservations # This allows different scheduling strategies per flavor group (e.g., HANA vs GP) committedResourceFlavorGroupPipelines: diff --git a/helm/bundles/cortex-pods/templates/pipelines.yaml b/helm/bundles/cortex-pods/templates/pipelines.yaml index 0dd3babd0..edf59daa2 100644 --- a/helm/bundles/cortex-pods/templates/pipelines.yaml +++ b/helm/bundles/cortex-pods/templates/pipelines.yaml @@ -8,7 +8,7 @@ spec: description: | This pipeline is used to schedule pods onto nodes. type: filter-weigher - createDecisions: true + createHistory: false filters: - name: noop description: | diff --git a/helm/bundles/cortex-pods/values.yaml b/helm/bundles/cortex-pods/values.yaml index 5ed4a94f9..fb4efaca1 100644 --- a/helm/bundles/cortex-pods/values.yaml +++ b/helm/bundles/cortex-pods/values.yaml @@ -35,6 +35,8 @@ cortex: gvks: - cortex.cloud/v1alpha1/Decision - cortex.cloud/v1alpha1/DecisionList + - cortex.cloud/v1alpha1/History + - cortex.cloud/v1alpha1/HistoryList - cortex.cloud/v1alpha1/Descheduling - cortex.cloud/v1alpha1/DeschedulingList - cortex.cloud/v1alpha1/Pipeline @@ -53,7 +55,6 @@ cortex: - v1/Binding enabledControllers: - pods-decisions-pipeline-controller - - explanation-controller monitoring: labels: github_org: cobaltcore-dev diff --git a/helm/library/cortex/files/crds/cortex.cloud_histories.yaml b/helm/library/cortex/files/crds/cortex.cloud_histories.yaml new file mode 100644 index 000000000..73e257e8a --- /dev/null +++ b/helm/library/cortex/files/crds/cortex.cloud_histories.yaml @@ -0,0 +1,297 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.0 + name: histories.cortex.cloud +spec: + group: cortex.cloud + names: + kind: History + listKind: HistoryList + plural: histories + singular: history + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.schedulingDomain + name: Domain + type: string + - jsonPath: .spec.resourceID + name: Resource ID + type: string + - jsonPath: .spec.availabilityZone + name: AZ + type: string + - jsonPath: .status.current.targetHost + name: Target Host + type: string + - jsonPath: .status.conditions[?(@.type=='Ready')].reason + name: Status + type: string + - jsonPath: .metadata.creationTimestamp + name: Created + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: History is the Schema for the history API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the desired state of History. + properties: + availabilityZone: + description: |- + The availability zone of the resource, if known. Only set for scheduling + domains that provide AZ information (e.g., Nova). + type: string + resourceID: + description: The resource ID this history belongs to (e.g., the UUID + of a nova instance). + type: string + schedulingDomain: + description: The scheduling domain this object with the history belongs + to. + type: string + required: + - resourceID + - schedulingDomain + type: object + status: + description: Status defines the observed state of History. + properties: + conditions: + description: Conditions represent the latest available observations + of the history's state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + current: + description: Current represents the latest scheduling decision with + full context. + properties: + explanation: + description: A human-readable explanation of the scheduling decision. + type: string + intent: + description: The intent of the decision (e.g., initial scheduling, + rescheduling, etc.). + type: string + orderedHosts: + description: The top hosts ordered by score (limited to 3). + items: + type: string + maxItems: 3 + type: array + pipelineRef: + description: The pipeline that was used for the decision. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + successful: + description: Whether the scheduling decision was successful. + type: boolean + targetHost: + description: The target host selected for the resource. nil when + no host was found. + type: string + timestamp: + description: The timestamp of when the decision was made. + format: date-time + type: string + required: + - intent + - pipelineRef + - successful + - timestamp + type: object + history: + description: History of past scheduling decisions (limited to last + 10). + items: + properties: + intent: + description: The intent of the decision (e.g., initial scheduling, + rescheduling, etc.). + type: string + orderedHosts: + description: |- + The top hosts ordered by score for the decision (limited to 3). + This is not a complete list of all candidates — only the highest-ranked + hosts are retained to keep the history compact. + items: + type: string + maxItems: 3 + type: array + pipelineRef: + description: The pipeline that was used for the decision. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + successful: + description: Whether the scheduling decision was successful. + type: boolean + timestamp: + description: The timestamp of when the decision was made. + format: date-time + type: string + required: + - intent + - pipelineRef + - timestamp + type: object + type: array + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/helm/library/cortex/files/crds/cortex.cloud_pipelines.yaml b/helm/library/cortex/files/crds/cortex.cloud_pipelines.yaml index 60b84a75d..2b2a08392 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_pipelines.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_pipelines.yaml @@ -58,10 +58,10 @@ spec: spec: description: spec defines the desired state of Pipeline properties: - createDecisions: + createHistory: default: false description: |- - If this pipeline should create decision objects. + If this pipeline should create history objects. When this is false, the pipeline will still process requests. type: boolean description: diff --git a/helm/library/cortex/templates/rbac/role.yaml b/helm/library/cortex/templates/rbac/role.yaml index 1f30375f3..440de5e31 100644 --- a/helm/library/cortex/templates/rbac/role.yaml +++ b/helm/library/cortex/templates/rbac/role.yaml @@ -17,6 +17,7 @@ rules: - deschedulings - pipelines - kpis + - histories verbs: - create - delete @@ -35,6 +36,7 @@ rules: - deschedulings/finalizers - pipelines/finalizers - kpis/finalizers + - histories/finalizers verbs: - update - apiGroups: @@ -47,6 +49,7 @@ rules: - deschedulings/status - pipelines/status - kpis/status + - histories/status verbs: - get - patch diff --git a/internal/scheduling/cinder/external_scheduler_api.go b/internal/scheduling/cinder/external_scheduler_api.go index f40ec3bae..7169228f4 100644 --- a/internal/scheduling/cinder/external_scheduler_api.go +++ b/internal/scheduling/cinder/external_scheduler_api.go @@ -149,6 +149,7 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R }, ResourceID: "", // TODO CinderRaw: &raw, + Intent: v1alpha1.SchedulingIntentUnknown, }, } ctx := r.Context() diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 6deabaf8d..7163cc5a6 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -78,12 +78,6 @@ func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context. if !ok { return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() err := c.process(ctx, decision) if err != nil { meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ @@ -100,10 +94,9 @@ func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context. Message: "pipeline run succeeded", }) } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err + if pipelineConf.Spec.CreateHistory { + if upsertErr := c.HistoryManager.CreateOrUpdateHistory(ctx, decision, nil, err); upsertErr != nil { + ctrl.LoggerFrom(ctx).Error(upsertErr, "failed to create/update history") } } return err @@ -155,6 +148,7 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainCinder + c.HistoryManager = lib.HistoryClient{Client: mcl, Recorder: mgr.GetEventRecorder("cortex-cinder-scheduler")} if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go index dadd74e70..e51dfec87 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go @@ -7,6 +7,7 @@ import ( "context" "encoding/json" "testing" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" @@ -247,13 +248,13 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) } tests := []struct { - name string - decision *v1alpha1.Decision - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectResult bool + name string + decision *v1alpha1.Decision + pipelineConfig *v1alpha1.Pipeline + createHistory bool + expectError bool + expectHistoryCreated bool + expectResult bool }{ { name: "successful decision processing with creation", @@ -264,6 +265,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) }, Spec: v1alpha1.DecisionSpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, + ResourceID: "test-uuid-1", PipelineRef: corev1.ObjectReference{ Name: "test-pipeline", }, @@ -279,15 +281,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectResult: true, + createHistory: true, + expectError: false, + expectHistoryCreated: true, + expectResult: true, }, { name: "successful decision processing without creation", @@ -313,15 +315,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - CreateDecisions: false, + CreateHistory: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectResult: true, + createHistory: false, + expectError: false, + expectHistoryCreated: false, + expectResult: true, }, { name: "pipeline not configured", @@ -340,10 +342,10 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) }, }, }, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + pipelineConfig: nil, + expectError: true, + expectHistoryCreated: false, + expectResult: false, }, { name: "decision without cinderRaw spec", @@ -367,15 +369,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + createHistory: true, + expectError: true, + expectHistoryCreated: true, + expectResult: false, }, } @@ -389,7 +391,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) client := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). + WithStatusSubresource(&v1alpha1.Decision{}, &v1alpha1.History{}). Build() controller := &FilterWeigherPipelineController{ @@ -397,6 +399,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Client: client, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + HistoryManager: lib.HistoryClient{Client: client}, }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -419,41 +422,23 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected no error but got: %v", err) } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder { - found = true - - // Verify decision properties - if decision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("expected pipeline ref %q, got %q", "test-pipeline", decision.Spec.PipelineRef.Name) - } - - // Check if result was set - if tt.expectResult { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - } + // Check if history CRD was created when expected + if tt.expectHistoryCreated { + var histories v1alpha1.HistoryList + deadline := time.Now().Add(2 * time.Second) + for { + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) > 0 { break } - } - - if !found { - t.Error("expected decision to be created but was not found") + if time.Now().After(deadline) { + t.Fatal("timed out waiting for history CRD to be created") + } + time.Sleep(5 * time.Millisecond) } } else if !tt.expectError { - // For cases without creation, check that the decision has the right status if tt.expectResult && tt.decision.Status.Result == nil { t.Error("expected decision result to be set in original decision object") } diff --git a/internal/scheduling/cinder/decisions_cleanup.go b/internal/scheduling/cinder/history_cleanup.go similarity index 76% rename from internal/scheduling/cinder/decisions_cleanup.go rename to internal/scheduling/cinder/history_cleanup.go index ffe6119dc..647c3426a 100644 --- a/internal/scheduling/cinder/decisions_cleanup.go +++ b/internal/scheduling/cinder/history_cleanup.go @@ -19,15 +19,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -type DecisionsCleanupConfig struct { +type HistoryCleanupConfig struct { // Secret ref to keystone credentials stored in a k8s secret. KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef"` // Secret ref to SSO credentials stored in a k8s secret, if applicable. SSOSecretRef *corev1.SecretReference `json:"ssoSecretRef"` } -// Delete all decisions for cinder volumes that have been deleted. -func DecisionsCleanup(ctx context.Context, client client.Client, conf DecisionsCleanupConfig) error { +// Delete all history entries for cinder volumes that have been deleted. +func HistoryCleanup(ctx context.Context, client client.Client, conf HistoryCleanupConfig) error { var authenticatedHTTP = http.DefaultClient if conf.SSOSecretRef != nil { var err error @@ -106,23 +106,23 @@ func DecisionsCleanup(ctx context.Context, client client.Client, conf DecisionsC volumesByID[volume.ID] = struct{}{} } - // List all decisions and delete those whose volume no longer exists. - decisionList := &v1alpha1.DecisionList{} - if err := client.List(ctx, decisionList); err != nil { + // List all history and delete those whose volume no longer exists. + historyList := &v1alpha1.HistoryList{} + if err := client.List(ctx, historyList); err != nil { return err } - for _, decision := range decisionList.Items { - // Skip non-cinder decisions. - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { + for _, history := range historyList.Items { + // Skip non-cinder history entries. + if history.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { continue } - // Skip decisions for which the volume still exists. - if _, ok := volumesByID[decision.Spec.ResourceID]; ok { + // Skip history entries for which the volume still exists. + if _, ok := volumesByID[history.Spec.ResourceID]; ok { continue } - // Delete the decision since the volume no longer exists. - slog.Info("deleting decision for deleted volume", "decision", decision.Name, "volumeID", decision.Spec.ResourceID) - if err := client.Delete(ctx, &decision); err != nil { + // Delete the history entry since the volume no longer exists. + slog.Info("deleting history entry for deleted volume", "history", history.Name, "volumeID", history.Spec.ResourceID) + if err := client.Delete(ctx, &history); err != nil { return err } } diff --git a/internal/scheduling/cinder/decisions_cleanup_test.go b/internal/scheduling/cinder/history_cleanup_test.go similarity index 84% rename from internal/scheduling/cinder/decisions_cleanup_test.go rename to internal/scheduling/cinder/history_cleanup_test.go index 570320ae5..5ec40c724 100644 --- a/internal/scheduling/cinder/decisions_cleanup_test.go +++ b/internal/scheduling/cinder/history_cleanup_test.go @@ -32,7 +32,7 @@ func TestCleanupCinder(t *testing.T) { tests := []struct { name string - decisions []v1alpha1.Decision + histories []v1alpha1.History expectError bool authError bool endpointError bool @@ -43,45 +43,45 @@ func TestCleanupCinder(t *testing.T) { }{ { name: "handle authentication error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, authError: true, expectError: true, }, { name: "handle endpoint error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, endpointError: true, expectError: true, }, { name: "handle server error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, mockServerError: true, expectError: true, }, { name: "handle empty volumes case", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, emptyVolumesError: true, expectError: false, }, { - name: "delete decisions for non-existent volumes", - decisions: []v1alpha1.Decision{ + name: "delete history for non-existent volumes", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-existing-volume", + Name: "history-existing-volume", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, ResourceID: "volume-exists", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-deleted-volume", + Name: "history-deleted-volume", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, ResourceID: "volume-deleted", }, @@ -90,26 +90,26 @@ func TestCleanupCinder(t *testing.T) { mockVolumes: []mockVolume{ {ID: "volume-exists"}, }, - expectedDeleted: []string{"decision-deleted-volume"}, + expectedDeleted: []string{"history-deleted-volume"}, expectError: false, }, { - name: "keep decisions for existing volumes", - decisions: []v1alpha1.Decision{ + name: "keep history for existing volumes", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-volume-1", + Name: "history-volume-1", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, ResourceID: "volume-1", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-volume-2", + Name: "history-volume-2", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, ResourceID: "volume-2", }, @@ -126,9 +126,9 @@ func TestCleanupCinder(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - objects := make([]client.Object, len(tt.decisions)) - for i := range tt.decisions { - objects[i] = &tt.decisions[i] + objects := make([]client.Object, len(tt.histories)) + for i := range tt.histories { + objects[i] = &tt.histories[i] } // Create mock Cinder server first @@ -281,13 +281,13 @@ func TestCleanupCinder(t *testing.T) { WithScheme(scheme). WithObjects(objects...). Build() - config := DecisionsCleanupConfig{ + config := HistoryCleanupConfig{ KeystoneSecretRef: corev1.SecretReference{ Name: "keystone-secret", Namespace: "default", }, } - err := DecisionsCleanup(context.Background(), client, config) + err := HistoryCleanup(context.Background(), client, config) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -297,32 +297,32 @@ func TestCleanupCinder(t *testing.T) { } if !tt.expectError { - // Verify expected decisions were deleted + // Verify expected history entries were deleted for _, expectedDeleted := range tt.expectedDeleted { - var decision v1alpha1.Decision + var history v1alpha1.History err := client.Get(context.Background(), - types.NamespacedName{Name: expectedDeleted}, &decision) + types.NamespacedName{Name: expectedDeleted}, &history) if err == nil { - t.Errorf("Expected decision %s to be deleted but it still exists", expectedDeleted) + t.Errorf("Expected history %s to be deleted but it still exists", expectedDeleted) } } - // Verify other decisions still exist - for _, originalDecision := range tt.decisions { + // Verify other history entries still exist + for _, originalHistory := range tt.histories { shouldBeDeleted := false for _, expectedDeleted := range tt.expectedDeleted { - if originalDecision.Name == expectedDeleted { + if originalHistory.Name == expectedDeleted { shouldBeDeleted = true break } } if !shouldBeDeleted { - var decision v1alpha1.Decision + var history v1alpha1.History err := client.Get(context.Background(), - types.NamespacedName{Name: originalDecision.Name}, &decision) + types.NamespacedName{Name: originalHistory.Name}, &history) if err != nil { - t.Errorf("Expected decision %s to still exist but got error: %v", - originalDecision.Name, err) + t.Errorf("Expected history %s to still exist but got error: %v", + originalHistory.Name, err) } } } @@ -331,7 +331,7 @@ func TestCleanupCinder(t *testing.T) { } } -func TestCleanupCinderDecisionsCancel(t *testing.T) { +func TestCleanupCinderHistoryCancel(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add scheme: %v", err) @@ -363,7 +363,7 @@ func TestCleanupCinderDecisionsCancel(t *testing.T) { WithObjects(objects...). Build() - config := DecisionsCleanupConfig{ + config := HistoryCleanupConfig{ KeystoneSecretRef: corev1.SecretReference{ Name: "keystone-secret", Namespace: "default", @@ -374,7 +374,7 @@ func TestCleanupCinderDecisionsCancel(t *testing.T) { defer cancel() // This should exit quickly due to context cancellation - if err := DecisionsCleanup(ctx, client, config); err != nil { + if err := HistoryCleanup(ctx, client, config); err != nil { if !errors.Is(err, context.DeadlineExceeded) { t.Errorf("Unexpected error during cleanup: %v", err) } diff --git a/internal/scheduling/explanation/controller.go b/internal/scheduling/explanation/controller.go deleted file mode 100644 index edb8ab3f6..000000000 --- a/internal/scheduling/explanation/controller.go +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "sort" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/pkg/multicluster" - corev1 "k8s.io/api/core/v1" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" -) - -type ControllerConfig struct { - // The controller will scope to objects using this scheduling domain name. - // This allows multiple controllers to coexist in the same cluster without - // interfering with each other's decisions. - SchedulingDomain v1alpha1.SchedulingDomain `json:"schedulingDomain"` -} - -// The explanation controller populates two fields of the decision status. -// -// First, it reconstructs the history of each decision. It will look for -// previous decisions for the same resource (based on ResourceID) and provide -// them through the decision history field. -// -// Second, it will use the available context for a decision to generate a -// human-readable explanation of why the decision was made the way it was. -// This explanation is intended to help operators understand the reasoning -// behind scheduling decisions. -type Controller struct { - // The kubernetes client to use for processing decisions. - client.Client - // Config for the controller. - Config ControllerConfig - // If the field indexing should be skipped (useful for testing). - SkipIndexFields bool -} - -// Check if a decision should be processed by this controller. -func (c *Controller) shouldReconcileDecision(decision *v1alpha1.Decision) bool { - // Ignore decisions not created by this operator. - if decision.Spec.SchedulingDomain != c.Config.SchedulingDomain { - return false - } - // Ignore decisions that already have an explanation. - if decision.Status.Explanation != "" { - return false - } - // Ignore decisions that have no result yet. - if decision.Status.Result == nil { - return false - } - return true -} - -// This loop will be called by the controller-runtime for each decision -// resource that needs to be reconciled. -func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := ctrl.LoggerFrom(ctx) - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - log.Error(err, "failed to get decision", "name", req.NamespacedName) - return ctrl.Result{}, client.IgnoreNotFound(err) - } - // Reconcile the history. - if err := c.reconcileHistory(ctx, decision); err != nil { - return ctrl.Result{}, err - } - // Reconcile the explanation. - if err := c.reconcileExplanation(ctx, decision); err != nil { - return ctrl.Result{}, err - } - log.Info("successfully reconciled decision explanation", "name", req.NamespacedName) - return ctrl.Result{}, nil -} - -// Process the history for the given decision. -func (c *Controller) reconcileHistory(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - // Get all previous decisions for the same ResourceID. - var previousDecisions v1alpha1.DecisionList - if c.SkipIndexFields { - // When field indexing is skipped, list all decisions and filter manually - if err := c.List(ctx, &previousDecisions); err != nil { - log.Error(err, "failed to list all decisions", "resourceID", decision.Spec.ResourceID) - return err - } - // Filter to only decisions with matching ResourceID - var filteredDecisions []v1alpha1.Decision - for _, prevDecision := range previousDecisions.Items { - if prevDecision.Spec.ResourceID == decision.Spec.ResourceID { - filteredDecisions = append(filteredDecisions, prevDecision) - } - } - previousDecisions.Items = filteredDecisions - } else { - // Use field indexing for efficient lookup - if err := c.List(ctx, &previousDecisions, client.MatchingFields{"spec.resourceID": decision.Spec.ResourceID}); err != nil { - log.Error(err, "failed to list previous decisions", "resourceID", decision.Spec.ResourceID) - return err - } - } - history := []corev1.ObjectReference{} // Not var-init so we see the empty slice. - // Make sure the resulting history will be in chronological order. - sort.Slice(previousDecisions.Items, func(i, j int) bool { - t1 := previousDecisions.Items[i].CreationTimestamp - t2 := previousDecisions.Items[j].CreationTimestamp - return t1.Before(&t2) - }) - for _, prevDecision := range previousDecisions.Items { - // Skip the current decision. - if prevDecision.Name == decision.Name && prevDecision.Namespace == decision.Namespace { - continue - } - // Skip decisions that were made after the current one. - if prevDecision.CreationTimestamp.After(decision.CreationTimestamp.Time) { - continue - } - history = append(history, corev1.ObjectReference{ - Kind: "Decision", - Namespace: prevDecision.Namespace, - Name: prevDecision.Name, - UID: prevDecision.UID, - }) - } - old := decision.DeepCopy() - decision.Status.History = &history - precedence := len(history) - decision.Status.Precedence = &precedence - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - log.Error(err, "failed to patch decision status with history", "name", decision.Name) - return err - } - log.Info("successfully reconciled decision history", "name", decision.Name) - return nil -} - -// Process the explanation for the given decision. -func (c *Controller) reconcileExplanation(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - explainer, err := NewExplainer(c.Client) - if err != nil { - log.Error(err, "failed to create explainer", "name", decision.Name) - return err - } - explanationText, err := explainer.Explain(ctx, decision) - if err != nil { - log.Error(err, "failed to explain decision", "name", decision.Name) - return err - } - old := decision.DeepCopy() - decision.Status.Explanation = explanationText - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - log.Error(err, "failed to patch decision status with explanation", "name", decision.Name) - return err - } - log.Info("successfully reconciled decision explanation", "name", decision.Name) - return nil -} - -// This function will be called when the manager starts up. Must block. -func (c *Controller) StartupCallback(ctx context.Context) error { - // Reprocess all existing decisions that need an explanation. - var decisions v1alpha1.DecisionList - if err := c.List(ctx, &decisions); err != nil { - return err - } - for _, decision := range decisions.Items { - if !c.shouldReconcileDecision(&decision) { - continue - } - if _, err := c.Reconcile(ctx, ctrl.Request{ - NamespacedName: client.ObjectKey{ - Namespace: decision.Namespace, - Name: decision.Name, - }, - }); err != nil { - return err - } - } - return nil -} - -// This function sets up the controller with the provided manager. -func (c *Controller) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { - if !c.SkipIndexFields { - ctx := context.Background() - obj := &v1alpha1.Decision{} - lst := &v1alpha1.DecisionList{} - idx := "spec.resourceID" - fnc := func(obj client.Object) []string { - decision := obj.(*v1alpha1.Decision) - return []string{decision.Spec.ResourceID} - } - if err := mcl.IndexField(ctx, obj, lst, idx, fnc); err != nil { - return err - } - } - if err := mgr.Add(manager.RunnableFunc(c.StartupCallback)); err != nil { - return err - } - bldr := multicluster.BuildController(mcl, mgr) - // Watch decision changes across all clusters. - bldr, err := bldr.WatchesMulticluster( - &v1alpha1.Decision{}, - &handler.EnqueueRequestForObject{}, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - return c.shouldReconcileDecision(decision) - }), - ) - if err != nil { - return err - } - return bldr.Named("explanation-controller"). - Complete(c) -} diff --git a/internal/scheduling/explanation/controller_test.go b/internal/scheduling/explanation/controller_test.go deleted file mode 100644 index f287b4995..000000000 --- a/internal/scheduling/explanation/controller_test.go +++ /dev/null @@ -1,589 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "testing" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestController_shouldReconcileDecision(t *testing.T) { - controller := &Controller{ - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expected bool - }{ - { - name: "should reconcile nova decision without explanation", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - expected: true, - }, - { - name: "should not reconcile decision from different operator", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: "different-operator", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - }, - expected: false, - }, - { - name: "should not reconcile decision with existing explanation", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "Already has explanation", - }, - }, - expected: false, - }, - { - name: "should not reconcile non-nova decision", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - }, - expected: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := controller.shouldReconcileDecision(tt.decision) - if result != tt.expected { - t.Errorf("shouldReconcileDecision() = %v, expected %v", result, tt.expected) - } - }) - } -} - -func TestController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - existingDecisions []v1alpha1.Decision - expectError bool - expectRequeue bool - expectedExplanation string - expectedHistoryLength int - }{ - { - name: "decision not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nonexistent-decision", - Namespace: "default", - }, - }, - expectError: false, // controller-runtime ignores not found errors - }, - { - name: "reconcile decision without history", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-1", - }, - Status: v1alpha1.DecisionStatus{}, - }, - expectedExplanation: "Initial placement of the nova server", - expectedHistoryLength: 0, - }, - { - name: "reconcile decision with history", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{}, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - }, - expectedHistoryLength: 1, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - var objects []runtime.Object - if tt.name != "decision not found" { - objects = append(objects, tt.decision) - } - for i := range tt.existingDecisions { - objects = append(objects, &tt.existingDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Errorf("Expected error but got none") - return - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if tt.expectRequeue && result.RequeueAfter == 0 { - t.Errorf("Expected requeue but got none") - } - if !tt.expectRequeue && result.RequeueAfter > 0 { - t.Errorf("Expected no requeue but got %v", result.RequeueAfter) - } - - // Only check results if we expect the decision to exist - if tt.name != "decision not found" { - // Verify the decision was updated - var updated v1alpha1.Decision - err = client.Get(context.Background(), req.NamespacedName, &updated) - if err != nil { - t.Errorf("Failed to get updated decision: %v", err) - return - } - - if tt.expectedExplanation != "" && !contains(updated.Status.Explanation, tt.expectedExplanation) { - t.Errorf("Expected explanation to contain '%s', but got: %s", tt.expectedExplanation, updated.Status.Explanation) - } - - if updated.Status.History != nil && len(*updated.Status.History) != tt.expectedHistoryLength { - t.Errorf("Expected history length %d, got %d", tt.expectedHistoryLength, len(*updated.Status.History)) - } - } - }) - } -} - -func TestController_reconcileHistory(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - existingDecisions []v1alpha1.Decision - expectedHistory int - expectError bool - }{ - { - name: "no previous decisions", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-1", - }, - }, - expectedHistory: 0, - }, - { - name: "one previous decision", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-2", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-2", - }, - }, - }, - expectedHistory: 1, - }, - { - name: "multiple previous decisions in correct order", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(2 * time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - }, - expectedHistory: 2, - }, - { - name: "exclude future decisions", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(2 * time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - }, - expectedHistory: 1, // Only test-decision-1 should be included - }, - { - name: "exclude decisions with different ResourceID", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-target", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "target-resource", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-same", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "target-resource", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-different", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "different-resource", - }, - }, - }, - expectedHistory: 1, // Only same ResourceID should be included - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.decision} - for i := range tt.existingDecisions { - objects = append(objects, &tt.existingDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - err := controller.reconcileHistory(context.Background(), tt.decision) - - if tt.expectError && err == nil { - t.Errorf("Expected error but got none") - return - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if tt.decision.Status.History == nil { - if tt.expectedHistory != 0 { - t.Errorf("Expected history length %d, got nil", tt.expectedHistory) - } - } else if len(*tt.decision.Status.History) != tt.expectedHistory { - t.Errorf("Expected history length %d, got %d", tt.expectedHistory, len(*tt.decision.Status.History)) - } - }) - } -} - -func TestController_reconcileExplanation(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: nil, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - } - - err := controller.reconcileExplanation(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if decision.Status.Explanation == "" { - t.Error("Expected explanation to be set but it was empty") - } - - if !contains(decision.Status.Explanation, "Initial placement of the nova server") { - t.Errorf("Expected explanation to contain nova server text, got: %s", decision.Status.Explanation) - } -} - -func TestController_StartupCallback(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - // Create a decision that should be reconciled - decision1 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-1", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", // Empty explanation means it should be reconciled - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - } - - // Create a decision that should not be reconciled (already has explanation) - decision2 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "Already has explanation", - }, - } - - // Create a decision from different operator that should not be reconciled - decision3 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: "different-operator", - ResourceID: "test-resource-3", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision1, decision2, decision3). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - err := controller.StartupCallback(context.Background()) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - // Verify that decision1 now has an explanation - var updated1 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-1", Namespace: "default"}, &updated1) - if err != nil { - t.Errorf("Failed to get updated decision1: %v", err) - } - - if updated1.Status.Explanation == "" { - t.Error("Expected decision1 to have explanation after startup callback") - } - - // Verify that decision2 explanation remains unchanged - var updated2 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-2", Namespace: "default"}, &updated2) - if err != nil { - t.Errorf("Failed to get updated decision2: %v", err) - } - - if updated2.Status.Explanation != "Already has explanation" { - t.Errorf("Expected decision2 explanation to remain unchanged, got: %s", updated2.Status.Explanation) - } - - // Verify that decision3 explanation remains empty (different operator) - var updated3 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-3", Namespace: "default"}, &updated3) - if err != nil { - t.Errorf("Failed to get updated decision3: %v", err) - } - - if updated3.Status.Explanation != "" { - t.Errorf("Expected decision3 explanation to remain empty, got: %s", updated3.Status.Explanation) - } -} diff --git a/internal/scheduling/explanation/explainer.go b/internal/scheduling/explanation/explainer.go deleted file mode 100644 index a5f199fae..000000000 --- a/internal/scheduling/explanation/explainer.go +++ /dev/null @@ -1,729 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "fmt" - "sort" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/errors" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" -) - -// The explainer gets a scheduling decision and produces a human-readable -// explanation of why the decision was made the way it was. -type Explainer struct { - // The kubernetes client to use for fetching related data. - client.Client - // The template manager to use for rendering explanations. - templateManager *TemplateManager -} - -// NewExplainer creates a new explainer with template support. -func NewExplainer(client client.Client) (*Explainer, error) { - templateManager, err := NewTemplateManager() - if err != nil { - return nil, fmt.Errorf("failed to create template manager: %w", err) - } - - return &Explainer{ - Client: client, - templateManager: templateManager, - }, nil -} - -// Explain the given decision and return a human-readable explanation. -func (e *Explainer) Explain(ctx context.Context, decision *v1alpha1.Decision) (string, error) { - return e.ExplainWithTemplates(ctx, decision) -} - -// getResourceType returns a human-readable resource type. -func (e *Explainer) getResourceType(schedulingDomain v1alpha1.SchedulingDomain) string { - switch schedulingDomain { - case v1alpha1.SchedulingDomainNova: - return "nova server" - case v1alpha1.SchedulingDomainManila: - return "manila share" - case v1alpha1.SchedulingDomainCinder: - return "cinder volume" - case v1alpha1.SchedulingDomainMachines: - return "ironcore machine" - default: - return "resource" - } -} - -// calculateScoreGap calculates the gap between first and second place. -func (e *Explainer) calculateScoreGap(weights map[string]float64) float64 { - if len(weights) < 2 { - return 0.0 - } - - scores := make([]float64, 0, len(weights)) - for _, score := range weights { - scores = append(scores, score) - } - - sort.Slice(scores, func(i, j int) bool { - return scores[i] > scores[j] - }) - - return scores[0] - scores[1] -} - -// fetchDecisionChain retrieves all decisions in the history chain. -func (e *Explainer) fetchDecisionChain(ctx context.Context, decision *v1alpha1.Decision) ([]*v1alpha1.Decision, error) { - var chainDecisions []*v1alpha1.Decision - logger := log.FromContext(ctx) - - // Add all historical decisions - if decision.Status.History != nil { - for _, ref := range *decision.Status.History { - histDecision := &v1alpha1.Decision{} - if err := e.Get(ctx, client.ObjectKey{ - Namespace: ref.Namespace, - Name: ref.Name, - }, histDecision); err != nil { - if errors.IsNotFound(err) { - logger.Info("History decision not found, skipping from chain analysis", - "decision", ref.Name, - "namespace", ref.Namespace, - "uid", ref.UID) - continue // Skip missing decisions instead of failing - } - // For other errors, still fail - return nil, err - } - chainDecisions = append(chainDecisions, histDecision) - } - } - - // Add current decision - chainDecisions = append(chainDecisions, decision) - - return chainDecisions, nil -} - -// HostSegment represents a segment in the host chain with duration and decision count. -type HostSegment struct { - host string - duration time.Duration // Full precision duration - decisions int -} - -// buildHostSegments creates host segments from decisions with durations. -func (e *Explainer) buildHostSegments(decisions []*v1alpha1.Decision) []HostSegment { - if len(decisions) < 2 { - return []HostSegment{} - } - - // Extract host chain - hostChain := make([]string, 0, len(decisions)) - for _, decision := range decisions { - host := "(n/a)" - if decision.Status.Result != nil && decision.Status.Result.TargetHost != nil { - host = *decision.Status.Result.TargetHost - } - hostChain = append(hostChain, host) - } - - // Build segments with durations - segments := make([]HostSegment, 0) - if len(hostChain) > 0 { - currentHost := hostChain[0] - segmentStart := 0 - - for i := 1; i <= len(hostChain); i++ { - // Check if we've reached the end or found a different host - if i == len(hostChain) || hostChain[i] != currentHost { - // Calculate duration for this segment - startTime := decisions[segmentStart].CreationTimestamp.Time - var endTime = startTime // Default to 0 duration for last segment - if i < len(hostChain) { - endTime = decisions[i].CreationTimestamp.Time - } - - duration := endTime.Sub(startTime) - - segments = append(segments, HostSegment{ - host: currentHost, - duration: duration, - decisions: i - segmentStart, - }) - - if i < len(hostChain) { - currentHost = hostChain[i] - segmentStart = i - } - } - } - } - - return segments -} - -// detectLoop checks if there are repeated hosts in the segments. -func (e *Explainer) detectLoop(segments []HostSegment) bool { - seenHosts := make(map[string]bool) - for _, segment := range segments { - if seenHosts[segment.host] { - return true - } - seenHosts[segment.host] = true - } - return false -} - -// findWinner returns the host with the highest score. -func (e *Explainer) findWinner(scores map[string]float64) string { - winner := "" - maxScore := -999999.0 - for host, score := range scores { - if score > maxScore { - maxScore = score - winner = host - } - } - return winner -} - -// ScoreCalculationResult holds both final scores and deleted host tracking information. -type ScoreCalculationResult struct { - FinalScores map[string]float64 - DeletedHosts map[string][]string // host -> list of steps that deleted it -} - -// StepImpact represents the impact of a single pipeline step on the winning host. -type StepImpact struct { - Step string - ScoreBefore float64 - ScoreAfter float64 - ScoreDelta float64 - CompetitorsRemoved int - PromotedToFirst bool -} - -// calculateScoresFromSteps processes step results sequentially to compute final scores and track deleted hosts. -func (e *Explainer) calculateScoresFromSteps(inputWeights map[string]float64, stepResults []v1alpha1.StepResult) ScoreCalculationResult { - if len(inputWeights) == 0 { - return ScoreCalculationResult{ - FinalScores: map[string]float64{}, - DeletedHosts: map[string][]string{}, - } - } - - // Start with input values as initial scores - currentScores := make(map[string]float64) - for hostName, inputValue := range inputWeights { - currentScores[hostName] = inputValue - } - - deletedHosts := make(map[string][]string) - - // Process each step sequentially - for _, stepResult := range stepResults { - // Check which hosts will be deleted in this step - for hostName := range currentScores { - if _, exists := stepResult.Activations[hostName]; !exists { - // Host not in this step's activations - will be deleted - deletedHosts[hostName] = append(deletedHosts[hostName], stepResult.StepName) - } - } - - // Apply activations and remove hosts not in this step - newScores := make(map[string]float64) - for hostName, score := range currentScores { - if activation, exists := stepResult.Activations[hostName]; exists { - // Add activation to current score - newScores[hostName] = score + activation - } - // Hosts not in activations are removed (don't copy to newScores) - } - currentScores = newScores - } - - return ScoreCalculationResult{ - FinalScores: currentScores, - DeletedHosts: deletedHosts, - } -} - -// calculateScoresWithoutStep processes step results while skipping one specific step. -func (e *Explainer) calculateScoresWithoutStep(inputWeights map[string]float64, stepResults []v1alpha1.StepResult, skipIndex int) ScoreCalculationResult { - if len(inputWeights) == 0 || skipIndex < 0 || skipIndex >= len(stepResults) { - return e.calculateScoresFromSteps(inputWeights, stepResults) - } - - // Create reduced step results without the skipped step - reducedSteps := make([]v1alpha1.StepResult, 0, len(stepResults)-1) - reducedSteps = append(reducedSteps, stepResults[:skipIndex]...) - reducedSteps = append(reducedSteps, stepResults[skipIndex+1:]...) - - return e.calculateScoresFromSteps(inputWeights, reducedSteps) -} - -// findCriticalSteps determines which steps change the winning host using backward elimination. -func (e *Explainer) findCriticalSteps(decision *v1alpha1.Decision) []string { - result := decision.Status.Result - if result == nil || len(result.StepResults) == 0 { - return []string{} - } - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return []string{} - } - - // Calculate baseline scores with all steps - baselineResult := e.calculateScoresFromSteps(inputWeights, result.StepResults) - baselineWinner := e.findWinner(baselineResult.FinalScores) - - if baselineWinner == "" { - return []string{} - } - - criticalSteps := make([]string, 0) - - // Try removing each step one by one - for i, stepResult := range result.StepResults { - // Calculate scores without this step - reducedResult := e.calculateScoresWithoutStep(inputWeights, result.StepResults, i) - - // Find winner without this step - reducedWinner := e.findWinner(reducedResult.FinalScores) - - // If removing this step changes the winner, it's critical - if reducedWinner != baselineWinner { - criticalSteps = append(criticalSteps, stepResult.StepName) - } - } - - return criticalSteps -} - -func (e *Explainer) calculateStepImpacts(inputWeights map[string]float64, stepResults []v1alpha1.StepResult, targetHost string) []StepImpact { - if len(inputWeights) == 0 || len(stepResults) == 0 { - return []StepImpact{} - } - - impacts := make([]StepImpact, 0, len(stepResults)) - currentScores := make(map[string]float64) - - // Start with input values as initial scores - for hostName, inputValue := range inputWeights { - currentScores[hostName] = inputValue - } - - // Track target host's score before first step - scoreBefore := currentScores[targetHost] - - // Process each pipeline step and track the target host's evolution - for _, stepResult := range stepResults { - // Count how many competitors will be removed in this step - competitorsRemoved := 0 - for hostName := range currentScores { - if hostName != targetHost { - if _, exists := stepResult.Activations[hostName]; !exists { - competitorsRemoved++ - } - } - } - - // Check if target host was #1 before this step - wasFirst := true - targetScoreBefore := currentScores[targetHost] - for host, score := range currentScores { - if host != targetHost && score > targetScoreBefore { - wasFirst = false - break - } - } - - // Apply activations and remove hosts not in this step - newScores := make(map[string]float64) - for hostName, score := range currentScores { - if activation, exists := stepResult.Activations[hostName]; exists { - newScores[hostName] = score + activation - } - // Hosts not in activations are removed (don't copy to newScores) - } - - // Get target host's score after this step - scoreAfter := newScores[targetHost] - - // Check if target host became #1 after this step - isFirstAfter := true - for host, score := range newScores { - if host != targetHost && score > scoreAfter { - isFirstAfter = false - break - } - } - - promotedToFirst := !wasFirst && isFirstAfter - - impacts = append(impacts, StepImpact{ - Step: stepResult.StepName, - ScoreBefore: scoreBefore, - ScoreAfter: scoreAfter, - ScoreDelta: scoreAfter - scoreBefore, - CompetitorsRemoved: competitorsRemoved, - PromotedToFirst: promotedToFirst, - }) - - // Update for next iteration - currentScores = newScores - scoreBefore = scoreAfter - } - - return impacts -} - -// Template data building functions - these functions extract and structure -// decision data into formats suitable for template rendering. - -// buildContextData creates context data for template rendering. -func (e *Explainer) buildContextData(decision *v1alpha1.Decision) ContextData { - resourceType := e.getResourceType(decision.Spec.SchedulingDomain) - - history := decision.Status.History - isInitial := history == nil || len(*history) == 0 - - decisionNumber := 1 - if !isInitial { - decisionNumber = len(*history) + 1 - if decision.Status.Precedence != nil { - decisionNumber = *decision.Status.Precedence + 1 - } - } - - return ContextData{ - ResourceType: resourceType, - DecisionNumber: decisionNumber, - IsInitial: isInitial, - } -} - -// buildHistoryData creates history comparison data for template rendering. -func (e *Explainer) buildHistoryData(ctx context.Context, decision *v1alpha1.Decision) (*HistoryData, error) { - history := decision.Status.History - if history == nil || len(*history) == 0 { - return nil, nil - } - - // Get the last decision - lastDecisionRef := (*history)[len(*history)-1] - lastDecision := &v1alpha1.Decision{} - if err := e.Get(ctx, client.ObjectKey{ - Namespace: lastDecisionRef.Namespace, - Name: lastDecisionRef.Name, - }, lastDecision); err != nil { - logger := log.FromContext(ctx) - if errors.IsNotFound(err) { - logger.Info("History decision not found, skipping history comparison", - "decision", lastDecisionRef.Name, - "namespace", lastDecisionRef.Namespace, - "uid", lastDecisionRef.UID) - return nil, nil // Skip history comparison instead of failing - } - // For other errors, still fail - return nil, err - } - - lastTarget := "(n/a)" - if lastDecision.Status.Result != nil && lastDecision.Status.Result.TargetHost != nil { - lastTarget = *lastDecision.Status.Result.TargetHost - } - - newTarget := "(n/a)" - if decision.Status.Result != nil && decision.Status.Result.TargetHost != nil { - newTarget = *decision.Status.Result.TargetHost - } - - return &HistoryData{ - PreviousTarget: lastTarget, - CurrentTarget: newTarget, - }, nil -} - -// buildWinnerData creates winner analysis data for template rendering. -func (e *Explainer) buildWinnerData(decision *v1alpha1.Decision) *WinnerData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil { - return nil - } - - targetHost := *result.TargetHost - - // Get target host score - targetScore := 0.0 - if result.AggregatedOutWeights != nil { - if score, exists := result.AggregatedOutWeights[targetHost]; exists { - targetScore = score - } - } - - // Count hosts evaluated - hostsEvaluated := len(result.OrderedHosts) - if hostsEvaluated == 0 && result.AggregatedOutWeights != nil { - hostsEvaluated = len(result.AggregatedOutWeights) - } - - // Calculate score gap to second place - gap := e.calculateScoreGap(result.AggregatedOutWeights) - - return &WinnerData{ - HostName: targetHost, - Score: targetScore, - Gap: gap, - HostsEvaluated: hostsEvaluated, - HasGap: gap > 0, - } -} - -// buildInputData creates input comparison data for template rendering. -func (e *Explainer) buildInputData(decision *v1alpha1.Decision) *InputData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil { - return nil - } - - targetHost := *result.TargetHost - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return nil - } - - // Find input winner - inputWinner := "" - inputWinnerScore := -999999.0 - for host, score := range inputWeights { - if score > inputWinnerScore { - inputWinnerScore = score - inputWinner = host - } - } - - if inputWinner == "" { - return nil - } - - // Get target host's final score - targetFinalScore := 0.0 - if result.AggregatedOutWeights != nil { - if score, exists := result.AggregatedOutWeights[targetHost]; exists { - targetFinalScore = score - } - } - - return &InputData{ - InputWinner: inputWinner, - InputScore: inputWinnerScore, - FinalWinner: targetHost, - FinalScore: targetFinalScore, - FinalInputScore: inputWeights[targetHost], - InputConfirmed: inputWinner == targetHost, - } -} - -// buildCriticalStepsData creates critical steps data for template rendering. -func (e *Explainer) buildCriticalStepsData(decision *v1alpha1.Decision) *CriticalStepsData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil || len(result.StepResults) == 0 { - return nil - } - - criticalSteps := e.findCriticalSteps(decision) - totalSteps := len(result.StepResults) - - return &CriticalStepsData{ - Steps: criticalSteps, - TotalSteps: totalSteps, - IsInputOnly: len(criticalSteps) == 0, - RequiresAll: len(criticalSteps) == totalSteps, - } -} - -// buildDeletedHostsData creates deleted hosts data for template rendering. -func (e *Explainer) buildDeletedHostsData(decision *v1alpha1.Decision) *DeletedHostsData { - result := decision.Status.Result - if result == nil || result.StepResults == nil || len(result.StepResults) == 0 { - return nil - } - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return nil - } - - // Calculate scores and get deleted hosts information - scoreResult := e.calculateScoresFromSteps(inputWeights, result.StepResults) - - if len(scoreResult.DeletedHosts) == 0 { - return nil - } - - // Find input winner - inputWinner := "" - inputWinnerScore := -999999.0 - for host, score := range inputWeights { - if score > inputWinnerScore { - inputWinnerScore = score - inputWinner = host - } - } - - // Build list of deleted hosts - deletedHosts := make([]DeletedHostInfo, 0, len(scoreResult.DeletedHosts)) - for hostName, steps := range scoreResult.DeletedHosts { - deletedHosts = append(deletedHosts, DeletedHostInfo{ - Name: hostName, - Steps: steps, - IsInputWinner: hostName == inputWinner, - }) - } - - return &DeletedHostsData{ - DeletedHosts: deletedHosts, - } -} - -// buildChainData creates chain analysis data for template rendering. -func (e *Explainer) buildChainData(ctx context.Context, decision *v1alpha1.Decision) (*ChainData, error) { - history := decision.Status.History - if history == nil || len(*history) == 0 { - return nil, nil // No chain for initial decisions - } - - // Fetch all decisions in the chain - chainDecisions, err := e.fetchDecisionChain(ctx, decision) - if err != nil { - return nil, err - } - - if len(chainDecisions) < 2 { - return nil, nil // Need at least 2 decisions for a chain - } - - // Build segments - segments := e.buildHostSegments(chainDecisions) - if len(segments) == 0 { - return nil, nil - } - - // Convert to template data format - chainSegments := make([]ChainSegment, len(segments)) - for i, segment := range segments { - chainSegments[i] = ChainSegment{ - Host: segment.host, - Duration: segment.duration, - Decisions: segment.decisions, - } - } - - return &ChainData{ - Segments: chainSegments, - HasLoop: e.detectLoop(segments), - }, nil -} - -// ExplainWithTemplates renders an explanation using Go templates. -func (e *Explainer) ExplainWithTemplates(ctx context.Context, decision *v1alpha1.Decision) (string, error) { - // Build explanation context - explanationCtx := ExplanationContext{ - Context: e.buildContextData(decision), - } - - // Build each component's data - if historyData, err := e.buildHistoryData(ctx, decision); err != nil { - return "", err - } else if historyData != nil { - explanationCtx.History = historyData - } - - if winnerData := e.buildWinnerData(decision); winnerData != nil { - explanationCtx.Winner = winnerData - } - - if inputData := e.buildInputData(decision); inputData != nil { - explanationCtx.Input = inputData - } - - if criticalStepsData := e.buildCriticalStepsData(decision); criticalStepsData != nil { - explanationCtx.CriticalSteps = criticalStepsData - } - - if deletedHostsData := e.buildDeletedHostsData(decision); deletedHostsData != nil { - explanationCtx.DeletedHosts = deletedHostsData - } - - // Build step impacts - if result := decision.Status.Result; result != nil && result.TargetHost != nil && len(result.StepResults) > 0 { - targetHost := *result.TargetHost - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - } - if inputWeights != nil { - impacts := e.calculateStepImpacts(inputWeights, result.StepResults, targetHost) - if len(impacts) > 0 { - // Sort impacts by absolute delta (highest first), with promotions taking priority - sort.Slice(impacts, func(i, j int) bool { - absI := impacts[i].ScoreDelta - if absI < 0 { - absI = -absI - } - absJ := impacts[j].ScoreDelta - if absJ < 0 { - absJ = -absJ - } - - if absI != absJ { - return absI > absJ - } - if impacts[i].PromotedToFirst != impacts[j].PromotedToFirst { - return impacts[i].PromotedToFirst - } - return impacts[i].Step < impacts[j].Step - }) - explanationCtx.StepImpacts = impacts - } - } - } - - if chainData, err := e.buildChainData(ctx, decision); err != nil { - return "", err - } else if chainData != nil { - explanationCtx.Chain = chainData - } - - // Render using templates - return e.templateManager.RenderExplanation(explanationCtx) -} diff --git a/internal/scheduling/explanation/explainer_test.go b/internal/scheduling/explanation/explainer_test.go deleted file mode 100644 index ed1d52e13..000000000 --- a/internal/scheduling/explanation/explainer_test.go +++ /dev/null @@ -1,1476 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "sort" - "testing" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestExplainer_Explain(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - historyDecisions []*v1alpha1.Decision - expectedContains []string - expectError bool - }{ - { - name: "initial nova server placement", - decision: WithResourceID(NewTestDecision("test-decision"), "test-resource-1"), - expectedContains: []string{"Initial placement of the nova server"}, - }, - { - name: "initial cinder volume placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-2"), v1alpha1.SchedulingDomainCinder), - expectedContains: []string{"Initial placement of the cinder volume"}, - }, - { - name: "initial manila share placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-3"), v1alpha1.SchedulingDomainManila), - expectedContains: []string{"Initial placement of the manila share"}, - }, - { - name: "initial ironcore machine placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-4"), v1alpha1.SchedulingDomainMachines), - expectedContains: []string{"Initial placement of the ironcore machine"}, - }, - { - name: "unknown resource type falls back to generic", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-5"), "unknown-type"), - expectedContains: []string{"Initial placement of the resource"}, - }, - { - name: "empty history array", - decision: WithResourceID(NewTestDecision("test-decision"), "test-resource-6"), - expectedContains: []string{"Initial placement of the nova server"}, - }, - { - name: "subsequent decision with history", - decision: WithHistoryRef( - WithTargetHost(WithResourceID(NewTestDecision("test-decision-2"), "test-resource-7"), "host-2"), - WithUID(WithTargetHost(WithResourceID(NewTestDecision("test-decision-1"), "test-resource-7"), "host-1"), "test-uid-1")), - historyDecisions: []*v1alpha1.Decision{ - WithUID(WithTargetHost(WithResourceID(NewTestDecision("test-decision-1"), "test-resource-7"), "host-1"), "test-uid-1"), - }, - expectedContains: []string{ - "Decision #2 for this nova server", - "Previous target host was 'host-1'", - "now it's 'host-2'", - }, - }, - { - name: "subsequent decision with nil target hosts", - decision: WithHistoryRef( - WithResourceID(NewTestDecision("test-decision-4"), "test-resource-8"), - WithUID(WithResourceID(NewTestDecision("test-decision-3"), "test-resource-8"), "test-uid-3")), - historyDecisions: []*v1alpha1.Decision{ - WithUID(WithResourceID(NewTestDecision("test-decision-3"), "test-resource-8"), "test-uid-3"), - }, - expectedContains: []string{ - "Decision #2 for this nova server", - "Previous target host was '(n/a)'", - "now it's '(n/a)'", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if len(tt.historyDecisions) > 0 { - RunExplanationTestWithHistory(t, tt.decision, tt.historyDecisions, tt.expectedContains) - } else { - RunExplanationTest(t, tt.decision, tt.expectedContains) - } - }) - } -} - -func TestExplainer_Explain_HistoryDecisionNotFound_GracefulHandling(t *testing.T) { - decision := NewDecision("test-decision"). - WithResourceID("test-resource"). - WithTargetHost("host-1"). - WithHistory([]corev1.ObjectReference{ - { - Kind: "Decision", - Namespace: "default", - Name: "non-existent-decision", - UID: "non-existent-uid", - }, - }). - Build() - - explainer := SetupExplainerTest(t, decision) - explanation, err := explainer.Explain(context.Background(), decision) - - // Should NOT error anymore - graceful handling - if err != nil { - t.Errorf("Expected no error with graceful handling, but got: %v", err) - } - - // Should contain context but not history comparison - if !contains(explanation, "Decision #2 for this nova server") { - t.Errorf("Expected explanation to contain context, but got: %s", explanation) - } - - if contains(explanation, "Previous target host") { - t.Errorf("Expected explanation to NOT contain history comparison when decision is missing, but got: %s", explanation) - } -} - -func TestExplainer_MissingHistoryDecisions_ChainAnalysis(t *testing.T) { - // Test that chain analysis works when some history decisions are missing - decision := NewDecision("current-decision"). - WithResourceID("test-resource"). - WithTargetHost("host-3"). - WithHistory([]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "missing-decision", UID: "missing-uid"}, - {Kind: "Decision", Namespace: "default", Name: "decision-3", UID: "uid-3"}, - }). - Build() - - // Only provide decision-1 and decision-3, missing decision-2 - availableDecision := NewDecision("decision-1"). - WithUID("uid-1"). - WithTargetHost("host-1"). - WithCreationTimestamp(time.Now().Add(-2 * time.Hour)). - Build() - - explainer := SetupExplainerTest(t, decision, availableDecision) - explanation, err := explainer.Explain(context.Background(), decision) - - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - // Should contain context with full history count - if !contains(explanation, "Decision #4 for this nova server") { - t.Errorf("Expected explanation to contain context, but got: %s", explanation) - } - - // Chain analysis should work with available decisions - if !contains(explanation, "Chain:") { - t.Errorf("Expected explanation to contain chain analysis, but got: %s", explanation) - } -} - -// Helper functions -func stringPtr(s string) *string { - return &s -} - -func contains(s, substr string) bool { - return len(s) >= len(substr) && (s == substr || substr == "" || findInString(s, substr)) -} - -func findInString(s, substr string) bool { - for i := 0; i <= len(s)-len(substr); i++ { - if s[i:i+len(substr)] == substr { - return true - } - } - return false -} - -// Generic Decision Helpers - Composable functions with smart defaults -func NewTestDecision(name string) *v1alpha1.Decision { - return &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: "default", // Sensible default - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, // Most common - ResourceID: "test-resource", // Generic default - }, - Status: v1alpha1.DecisionStatus{}, - } -} - -func WithTargetHost(decision *v1alpha1.Decision, host string) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.TargetHost = &host - return decision -} - -func WithInputWeights(decision *v1alpha1.Decision, weights map[string]float64) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.RawInWeights = weights - return decision -} - -func WithOutputWeights(decision *v1alpha1.Decision, weights map[string]float64) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.AggregatedOutWeights = weights - - // Auto-generate ordered hosts from weights - hosts := make([]string, 0, len(weights)) - for host := range weights { - hosts = append(hosts, host) - } - sort.Slice(hosts, func(i, j int) bool { - return weights[hosts[i]] > weights[hosts[j]] - }) - decision.Status.Result.OrderedHosts = hosts - - return decision -} - -func WithSteps(decision *v1alpha1.Decision, steps ...v1alpha1.StepResult) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.StepResults = steps - return decision -} - -func WithSchedulingDomain(decision *v1alpha1.Decision, schedulingDomain v1alpha1.SchedulingDomain) *v1alpha1.Decision { - decision.Spec.SchedulingDomain = schedulingDomain - return decision -} - -func WithResourceID(decision *v1alpha1.Decision, resourceID string) *v1alpha1.Decision { - decision.Spec.ResourceID = resourceID - return decision -} - -func WithUID(decision *v1alpha1.Decision, uid string) *v1alpha1.Decision { - decision.UID = types.UID(uid) - return decision -} - -func WithHistory(decision *v1alpha1.Decision, refs []corev1.ObjectReference) *v1alpha1.Decision { - decision.Status.History = &refs - return decision -} - -// Helper to create a decision with history reference to another decision -func WithHistoryRef(decision, historyDecision *v1alpha1.Decision) *v1alpha1.Decision { - refs := []corev1.ObjectReference{ - { - Kind: "Decision", - Namespace: historyDecision.Namespace, - Name: historyDecision.Name, - UID: historyDecision.UID, - }, - } - decision.Status.History = &refs - return decision -} - -// Generic step creator -func Step(name string, activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: name, - Activations: activations, - } -} - -// Common step names as constants -const ( - AvailabilityFilter = "availability-filter" - ResourceWeigher = "resource-weigher" - PlacementPolicy = "placement-policy" -) - -// Decision Builder Pattern - Fluent interface for creating test decisions -type DecisionBuilder struct { - decision *v1alpha1.Decision -} - -func NewDecision(name string) *DecisionBuilder { - return &DecisionBuilder{ - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{}, - }, - } -} - -func (b *DecisionBuilder) WithResourceID(resourceID string) *DecisionBuilder { - b.decision.Spec.ResourceID = resourceID - return b -} - -func (b *DecisionBuilder) WithSchedulingDomain(schedulingDomain v1alpha1.SchedulingDomain) *DecisionBuilder { - b.decision.Spec.SchedulingDomain = schedulingDomain - return b -} - -func (b *DecisionBuilder) WithTargetHost(host string) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.TargetHost = stringPtr(host) - return b -} - -func (b *DecisionBuilder) WithRawInputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.RawInWeights = weights - return b -} - -func (b *DecisionBuilder) WithNormalizedInputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.NormalizedInWeights = weights - return b -} - -func (b *DecisionBuilder) WithAggregatedOutputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.AggregatedOutWeights = weights - return b -} - -func (b *DecisionBuilder) WithOrderedHosts(hosts []string) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.OrderedHosts = hosts - return b -} - -func (b *DecisionBuilder) WithSteps(steps ...v1alpha1.StepResult) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.StepResults = steps - return b -} - -func (b *DecisionBuilder) WithHistory(refs []corev1.ObjectReference) *DecisionBuilder { - b.decision.Status.History = &refs - return b -} - -func (b *DecisionBuilder) WithHistoryDecisions(decisions ...*v1alpha1.Decision) *DecisionBuilder { - refs := make([]corev1.ObjectReference, len(decisions)) - for i, decision := range decisions { - refs[i] = corev1.ObjectReference{ - Kind: "Decision", - Namespace: decision.Namespace, - Name: decision.Name, - UID: decision.UID, - } - } - b.decision.Status.History = &refs - return b -} - -func (b *DecisionBuilder) WithPrecedence(precedence int) *DecisionBuilder { - b.decision.Status.Precedence = intPtr(precedence) - return b -} - -func (b *DecisionBuilder) WithUID(uid string) *DecisionBuilder { - b.decision.UID = types.UID(uid) - return b -} - -func (b *DecisionBuilder) WithCreationTimestamp(timestamp time.Time) *DecisionBuilder { - b.decision.CreationTimestamp = metav1.Time{Time: timestamp} - return b -} - -func (b *DecisionBuilder) Build() *v1alpha1.Decision { - return b.decision -} - -// Pre-built scenario helpers for common test patterns -func DecisionWithScoring(name, winner string, scores map[string]float64) *DecisionBuilder { - orderedHosts := make([]string, 0, len(scores)) - for host := range scores { - orderedHosts = append(orderedHosts, host) - } - // Sort by score descending - sort.Slice(orderedHosts, func(i, j int) bool { - return scores[orderedHosts[i]] > scores[orderedHosts[j]] - }) - - return NewDecision(name). - WithTargetHost(winner). - WithAggregatedOutputWeights(scores). - WithOrderedHosts(orderedHosts) -} - -func DecisionWithInputComparison(name, winner string, inputWeights, finalWeights map[string]float64) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner). - WithRawInputWeights(inputWeights). - WithAggregatedOutputWeights(finalWeights) -} - -func DecisionWithCriticalSteps(name, winner string, inputWeights map[string]float64, steps ...v1alpha1.StepResult) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner). - WithRawInputWeights(inputWeights). - WithSteps(steps...) -} - -func DecisionWithHistory(name, winner string) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner) -} - -// Step result builders for common pipeline steps -func ResourceWeigherStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "resource-weigher", - Activations: activations, - } -} - -func AvailabilityFilterStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "availability-filter", - Activations: activations, - } -} - -func PlacementPolicyStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "placement-policy", - Activations: activations, - } -} - -func WeigherStep(name string, activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: name, - Activations: activations, - } -} - -// Test execution helpers -func SetupExplainerTest(t *testing.T, decisions ...*v1alpha1.Decision) *Explainer { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - objects := make([]runtime.Object, len(decisions)) - for i, decision := range decisions { - objects[i] = decision - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Fatalf("Failed to create explainer: %v", err) - } - return explainer -} - -func RunExplanationTest(t *testing.T, decision *v1alpha1.Decision, expectedContains []string) { - explainer := SetupExplainerTest(t, decision) - explanation, err := explainer.Explain(context.Background(), decision) - AssertNoError(t, err) - AssertExplanationContains(t, explanation, expectedContains...) -} - -func RunExplanationTestWithHistory(t *testing.T, decision *v1alpha1.Decision, historyDecisions []*v1alpha1.Decision, expectedContains []string) { - allDecisions := make([]*v1alpha1.Decision, len(historyDecisions)+1) - copy(allDecisions, historyDecisions) - allDecisions[len(historyDecisions)] = decision - explainer := SetupExplainerTest(t, allDecisions...) - explanation, err := explainer.Explain(context.Background(), decision) - AssertNoError(t, err) - AssertExplanationContains(t, explanation, expectedContains...) -} - -func AssertNoError(t *testing.T, err error) { - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } -} - -func AssertExplanationContains(t *testing.T, explanation string, expected ...string) { - for _, exp := range expected { - if !contains(explanation, exp) { - t.Errorf("Expected explanation to contain '%s', but got: %s", exp, explanation) - } - } -} - -func AssertExplanationNotContains(t *testing.T, explanation string, notExpected ...string) { - for _, notExp := range notExpected { - if contains(explanation, notExp) { - t.Errorf("Expected explanation to NOT contain '%s', but got: %s", notExp, explanation) - } - } -} - -func TestExplainer_WinnerAnalysis(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "winner analysis with score gap", - decision: DecisionWithScoring("test-decision", "host-1", - map[string]float64{"host-1": 2.45, "host-2": 2.10, "host-3": 1.85}). - Build(), - expectedContains: []string{ - "Selected: host-1 (score: 2.45)", - "gap to 2nd: 0.35", - "3 hosts evaluated", - }, - }, - { - name: "winner analysis with single host", - decision: DecisionWithScoring("test-decision", "host-1", - map[string]float64{"host-1": 2.45}). - Build(), - expectedContains: []string{ - "Selected: host-1 (score: 2.45)", - "1 host evaluated", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - RunExplanationTest(t, tt.decision, tt.expectedContains) - }) - } -} - -func TestExplainer_InputComparison(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "input choice confirmed", - decision: DecisionWithInputComparison("test-decision", "host-1", - map[string]float64{"host-1": 1.20, "host-2": 1.10, "host-3": 0.95}, - map[string]float64{"host-1": 2.45, "host-2": 2.10, "host-3": 1.85}). - Build(), - expectedContains: []string{ - "Input choice confirmed: host-1 (1.20→2.45)", - }, - }, - { - name: "input choice overridden", - decision: DecisionWithInputComparison("test-decision", "host-2", - map[string]float64{"host-1": 1.50, "host-2": 1.20, "host-3": 0.95}, - map[string]float64{"host-1": 1.85, "host-2": 2.45, "host-3": 2.10}). - Build(), - expectedContains: []string{ - "Input favored host-1 (1.50), final winner: host-2 (1.20→2.45)", - }, - }, - { - name: "raw weights preferred over normalized", - decision: NewDecision("test-decision"). - WithTargetHost("host-1"). - WithRawInputWeights(map[string]float64{"host-1": 100.0, "host-2": 90.0}). - WithNormalizedInputWeights(map[string]float64{"host-1": 1.0, "host-2": 0.9}). - WithAggregatedOutputWeights(map[string]float64{"host-1": 2.45, "host-2": 2.10}). - Build(), - expectedContains: []string{ - "Input choice confirmed: host-1 (100.00→2.45)", // Should now use raw weights (100.00) - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - RunExplanationTest(t, tt.decision, tt.expectedContains) - }) - } -} - -func TestExplainer_CriticalStepsAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "single critical step", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2}), - Step("availability-filter", map[string]float64{"host-1": 0.0, "host-2": 0.0})), - expectedContains: []string{ - "Decision driven by 1/2 pipeline step: resource-weigher", - }, - }, - { - name: "multiple critical steps", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("availability-filter", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("placement-policy", map[string]float64{"host-1": 0.05, "host-2": 0.05})), - expectedContains: []string{ - "Decision driven by 2/3 pipeline steps: resource-weigher, availability-filter", - }, - }, - { - name: "all steps non-critical", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 3.0, "host-2": 1.0}), - Step("step-1", map[string]float64{"host-1": 0.05, "host-2": 0.05}), - Step("step-2", map[string]float64{"host-1": 0.02, "host-2": 0.02})), - expectedContains: []string{ - "Decision driven by input only (all 2 steps are non-critical)", - }, - }, - { - name: "all steps critical", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0}), - Step("step-1", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("step-2", map[string]float64{"host-1": 1.0, "host-2": 0.0})), - expectedContains: []string{ - "Decision requires all 2 pipeline steps", - }, - }, - { - name: "three critical steps formatting", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 4.0}), - Step("step-a", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("step-b", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("step-c", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("step-d", map[string]float64{"host-1": 0.05, "host-2": 0.05})), - expectedContains: []string{ - "Decision driven by 3/4 pipeline steps: step-a, step-b, step-c", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - }) - } -} - -func TestExplainer_CompleteExplanation(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - previousDecision := WithUID(WithTargetHost(NewTestDecision("test-decision-1"), "host-1"), "test-uid-1") - - decision := WithSteps( - WithOutputWeights( - WithInputWeights( - WithHistoryRef( - WithTargetHost(NewTestDecision("test-decision-2"), "host-2"), - previousDecision), - map[string]float64{"host-1": 1.50, "host-2": 1.20, "host-3": 0.95}), - map[string]float64{"host-1": 1.85, "host-2": 2.45, "host-3": 2.10}), - Step("resource-weigher", map[string]float64{"host-1": 0.15, "host-2": 0.85, "host-3": 0.75}), - Step("availability-filter", map[string]float64{"host-1": 0.20, "host-2": 0.40, "host-3": 0.40})) - - // Set precedence manually since it's not commonly used - decision.Status.Precedence = intPtr(1) - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision, previousDecision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - expectedParts := []string{ - "Decision #2 for this nova server", - "Previous target host was 'host-1', now it's 'host-2'", - "Selected: host-2 (score: 2.45), gap to 2nd: 0.35, 3 hosts evaluated", - "Input favored host-1 (1.50), final winner: host-2 (1.20→2.45)", - "Decision driven by 1/2 pipeline step: resource-weigher", - } - - for _, expected := range expectedParts { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } -} - -func TestExplainer_DeletedHostsAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "single host filtered by single step", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "1 host filtered:", - "- host-2 (input choice) by availability-filter", - }, - }, - { - name: "multiple hosts filtered", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 3.0, "host-2": 2.0, "host-3": 1.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "2 hosts filtered", - }, - }, - { - name: "multiple hosts filtered including input winner", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0, "host-3": 2.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "2 hosts filtered:", - "- host-2 (input choice) by availability-filter", - "- host-3 by availability-filter", - }, - }, - { - name: "no hosts filtered", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 0.5, "host-2": 0.3})), - expectedContains: []string{}, // No deleted hosts analysis should be present - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - // For the "no hosts filtered" case, ensure no deleted hosts analysis is present - if len(tt.expectedContains) == 0 { - deletedHostsKeywords := []string{"filtered", "Input winner", "hosts filtered"} - for _, keyword := range deletedHostsKeywords { - if contains(explanation, keyword) { - t.Errorf("Expected explanation to NOT contain '%s' for no deleted hosts case, but got: %s", keyword, explanation) - } - } - } - }) - } -} - -func TestExplainer_GlobalChainAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - baseTime := metav1.Now() - time1 := metav1.Time{Time: baseTime.Add(-120 * time.Minute)} // 2 hours ago - time2 := metav1.Time{Time: baseTime.Add(-60 * time.Minute)} // 1 hour ago - time3 := metav1.Time{Time: baseTime.Time} // now - - tests := []struct { - name string - currentDecision *v1alpha1.Decision - historyDecisions []v1alpha1.Decision - expectedContains []string - expectedNotContain []string - }{ - { - name: "simple chain with durations", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-3"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (1h0m0s) -> host-2 (1h0m0s) -> host-3 (0s).", - }, - }, - { - name: "chain with loop detection", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Back to host-1 - creates loop - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain (loop detected): host-1 (1h0m0s) -> host-2 (1h0m0s) -> host-1 (0s).", - }, - }, - { - name: "chain with multiple decisions on same host", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-4", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - {Kind: "Decision", Namespace: "default", Name: "decision-3", UID: "uid-3"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time1, // Same time as decision-1 - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Same host as decision-1 - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - UID: "uid-3", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Still same host - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (2h0m0s; 3 decisions) -> host-2 (0s).", - }, - }, - { - name: "chain with multi-day duration", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: baseTime.Time}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: metav1.Time{Time: baseTime.Add(-72 * time.Hour)}, // 3 days ago - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (3d0h0m0s) -> host-2 (0s).", - }, - }, - { - name: "no chain for initial decision", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: nil, // No history - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{}, - expectedNotContain: []string{ - "Chain:", - "chain:", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.currentDecision} - for i := range tt.historyDecisions { - objects = append(objects, &tt.historyDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.currentDecision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - for _, notExpected := range tt.expectedNotContain { - if contains(explanation, notExpected) { - t.Errorf("Expected explanation to NOT contain '%s', but got: %s", notExpected, explanation) - } - } - }) - } -} - -func intPtr(i int) *int { - return &i -} - -func TestExplainer_RawWeightsPriorityBugFix(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - description string - }{ - { - name: "raw_weights_preserve_small_differences", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-2"), - map[string]float64{"host-1": 1000.05, "host-2": 1000.10, "host-3": 1000.00}), - map[string]float64{"host-1": 1001.05, "host-2": 1002.10, "host-3": 1001.00}) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "Input choice confirmed: host-2 (1000.10→1002.10)", // Should use raw weights (1000.10) - }, - description: "Raw weights preserve small differences that normalized weights would mask", - }, - { - name: "raw_weights_detect_correct_input_winner", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-3"), - map[string]float64{"host-1": 2000.15, "host-2": 2000.10, "host-3": 2000.05}), - map[string]float64{"host-1": 2001.15, "host-2": 2001.10, "host-3": 2002.05}) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "Input favored host-1 (2000.15), final winner: host-3 (2000.05→2002.05)", // Should detect host-1 as input winner using raw weights - }, - description: "Raw weights correctly identify input winner that normalized weights would miss", - }, - { - name: "critical_steps_analysis_uses_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1000.05, "host-2": 1000.00}), - Step("resource-weigher", map[string]float64{"host-1": 0.5, "host-2": 0.0})) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0} - return decision - }(), - expectedContains: []string{ - "Decision driven by input only (all 1 step is non-critical)", // With small raw weight advantage, step is non-critical - "Input choice confirmed: host-1 (1000.05→0.00)", // Shows raw weights are being used - }, - description: "Critical steps analysis uses raw weights - with small raw advantage, step becomes non-critical", - }, - { - name: "deleted_hosts_analysis_uses_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1000.00, "host-2": 1000.05, "host-3": 999.95}), - Step("availability-filter", map[string]float64{"host-1": 0.0})) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "2 hosts filtered:", - "- host-2 (input choice) by availability-filter", - "Input favored host-2 (1000.05), final winner: host-1 (1000.00→0.00)", - }, - description: "Deleted hosts analysis uses raw weights to correctly identify input winner", - }, - { - name: "fallback_to_normalized_when_no_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.5, "host-2": 2.0, "host-3": 1.8}) - // Set normalized weights and clear raw weights to test fallback - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.5, "host-2": 1.0, "host-3": 0.8} - decision.Status.Result.RawInWeights = nil - return decision - }(), - expectedContains: []string{ - "Input choice confirmed: host-1 (1.50→2.50)", // Should use normalized weights as fallback - }, - description: "Should fall back to normalized weights when raw weights are not available", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Fatalf("Failed to create explainer: %v", err) - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - }) - } -} - -// TestExplainer_RawVsNormalizedComparison demonstrates the impact of the bug fix -func TestExplainer_RawVsNormalizedComparison(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - RawInWeights: map[string]float64{ - "host-1": 1000.05, // Very small difference - "host-2": 1000.10, // Slightly higher - should be detected as input winner - "host-3": 1000.00, - }, - NormalizedInWeights: map[string]float64{ - "host-1": 1.0, // All normalized to same value - would mask the difference - "host-2": 1.0, - "host-3": 1.0, - }, - AggregatedOutWeights: map[string]float64{ - "host-1": 1001.05, - "host-2": 1002.10, // host-2 wins - "host-3": 1001.00, - }, - }, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if !contains(explanation, "Input choice confirmed: host-2 (1000.10→1002.10)") { - t.Errorf("Expected explanation to show raw weight value (1000.10), but got: %s", explanation) - } - - if contains(explanation, "Input favored host-1") || contains(explanation, "Input favored host-3") { - t.Errorf("Expected explanation to NOT show input choice override, but got: %s", explanation) - } -} - -func TestExplainer_StepImpactAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "step with positive impact", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +1.50", - }, - }, - { - name: "step with promotion to first", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 2.0, "host-2": 0.5})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +2.00→#1", - }, - }, - { - name: "step with competitor removal", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.0, "host-2": 1.0, "host-3": 0.5}), - Step("availability-filter", map[string]float64{"host-1": 0.0})), - expectedContains: []string{ - "Step impacts:", - "availability-filter +0.00 (removed 2)", - }, - }, - { - name: "multiple steps sorted by impact", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2}), - Step("availability-filter", map[string]float64{"host-1": 0.1, "host-2": 0.0})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +1.50", - "availability-filter +0.10", - }, - }, - { - name: "no step impacts for decision without steps", - decision: WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.0, "host-2": 1.0}), - expectedContains: []string{}, // No step impacts should be present - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - // For the "no step impacts" case, ensure no step impacts analysis is present - if len(tt.expectedContains) == 0 { - stepImpactsKeywords := []string{"Step impacts:", "→#1", "removed"} - for _, keyword := range stepImpactsKeywords { - if contains(explanation, keyword) { - t.Errorf("Expected explanation to NOT contain '%s' for no step impacts case, but got: %s", keyword, explanation) - } - } - } - }) - } -} diff --git a/internal/scheduling/explanation/templates.go b/internal/scheduling/explanation/templates.go deleted file mode 100644 index dc7160c07..000000000 --- a/internal/scheduling/explanation/templates.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "bytes" - "fmt" - "strings" - "text/template" - "time" -) - -type TemplateManager struct { - templates *template.Template -} - -func NewTemplateManager() (*TemplateManager, error) { - tmpl := template.New("explanation").Funcs(template.FuncMap{ - "join": strings.Join, - "formatDuration": formatTemplateDuration, - "formatFloat": func(f float64) string { return fmt.Sprintf("%.2f", f) }, - "formatDelta": func(f float64) string { return fmt.Sprintf("%+.2f", f) }, - "add": func(a, b int) int { return a + b }, - "plural": func(n int, singular, plural string) string { - if n == 1 { - return singular - } - return plural - }, - }) - - tmpl, err := tmpl.Parse(mainTemplate) - if err != nil { - return nil, fmt.Errorf("failed to parse main template: %w", err) - } - - templates := map[string]string{ - "context": contextTemplate, - "history": historyTemplate, - "winner": winnerTemplate, - "input": inputTemplate, - "critical": criticalTemplate, - "deleted": deletedTemplate, - "impacts": impactsTemplate, - "chain": chainTemplate, - } - - for name, templateStr := range templates { - tmpl, err = tmpl.Parse(fmt.Sprintf(`{{define "%s"}}%s{{end}}`, name, templateStr)) - if err != nil { - return nil, fmt.Errorf("failed to parse %s template: %w", name, err) - } - } - - return &TemplateManager{templates: tmpl}, nil -} - -func (tm *TemplateManager) RenderExplanation(ctx ExplanationContext) (string, error) { - var buf bytes.Buffer - err := tm.templates.Execute(&buf, ctx) - if err != nil { - return "", fmt.Errorf("failed to render explanation: %w", err) - } - return strings.TrimSpace(buf.String()), nil -} - -func formatTemplateDuration(d time.Duration) string { - if d == 0 { - return "0s" - } - - // Truncate to seconds to remove sub-second precision - d = d.Truncate(time.Second) - - // For durations >= 24 hours, convert to days format - if d >= 24*time.Hour { - days := int(d.Hours()) / 24 - remainder := d - time.Duration(days)*24*time.Hour - if remainder == 0 { - return fmt.Sprintf("%dd0h0m0s", days) - } - return fmt.Sprintf("%d%s", days, remainder.String()) - } - - // For shorter durations, use Go's built-in formatting - return d.String() -} - -const mainTemplate = `{{template "context" .Context}} -{{- if .History}} {{template "history" .History}}{{end}} -{{- if .Winner}} {{template "winner" .Winner}}{{end}} -{{- if .Input}} {{template "input" .Input}}{{end}} -{{- if .CriticalSteps}} {{template "critical" .CriticalSteps}}{{end}} -{{- if .DeletedHosts}} {{template "deleted" .DeletedHosts}}{{end}} -{{- if .StepImpacts}} {{template "impacts" .StepImpacts}}{{end}} -{{- if .Chain}} {{template "chain" .Chain}}{{end}}` - -const contextTemplate = `{{if .IsInitial -}} -Initial placement of the {{.ResourceType}}. -{{- else -}} -Decision #{{.DecisionNumber}} for this {{.ResourceType}}. -{{- end}}` - -const historyTemplate = `Previous target host was '{{.PreviousTarget}}', now it's '{{.CurrentTarget}}'.` - -const winnerTemplate = `Selected: {{.HostName}} (score: {{formatFloat .Score}}) -{{- if .HasGap}}, gap to 2nd: {{formatFloat .Gap}}{{end}}, {{.HostsEvaluated}} {{plural .HostsEvaluated "host" "hosts"}} evaluated.` - -const inputTemplate = `{{if .InputConfirmed -}} -Input choice confirmed: {{.FinalWinner}} ({{formatFloat .InputScore}}→{{formatFloat .FinalScore}}). -{{- else -}} -Input favored {{.InputWinner}} ({{formatFloat .InputScore}}), final winner: {{.FinalWinner}} ({{formatFloat .FinalInputScore}}→{{formatFloat .FinalScore}}). -{{- end}}` - -const criticalTemplate = `{{if .IsInputOnly -}} -Decision driven by input only (all {{.TotalSteps}} {{plural .TotalSteps "step is" "steps are"}} non-critical). -{{- else if .RequiresAll -}} -Decision requires all {{.TotalSteps}} pipeline {{plural .TotalSteps "step" "steps"}}. -{{- else if eq (len .Steps) 1 -}} -Decision driven by 1/{{.TotalSteps}} pipeline step: {{index .Steps 0}}. -{{- else -}} -Decision driven by {{len .Steps}}/{{.TotalSteps}} pipeline {{plural .TotalSteps "step" "steps"}}: {{join .Steps ", "}}. -{{- end}}` - -const deletedTemplate = `{{len .DeletedHosts}} {{plural (len .DeletedHosts) "host" "hosts"}} filtered: -{{- range .DeletedHosts}} - - {{.Name}}{{if .IsInputWinner}} (input choice){{end}} by {{join .Steps ", "}} -{{- end}}` - -const impactsTemplate = ` Step impacts: -{{- range $i, $impact := .}} -• {{$impact.Step}} -{{- if $impact.PromotedToFirst}} {{formatDelta $impact.ScoreDelta}}→#1 -{{- else if ne $impact.ScoreDelta 0.0}} {{formatDelta $impact.ScoreDelta}} -{{- else if gt $impact.CompetitorsRemoved 0}} +0.00 (removed {{$impact.CompetitorsRemoved}}) -{{- else}} +0.00{{end}} -{{- end}}` - -const chainTemplate = `{{if .HasLoop}}Chain (loop detected): {{else}}Chain: {{end}} -{{- range $i, $segment := .Segments}}{{if gt $i 0}} -> {{end}}{{$segment.Host}} ({{formatDuration $segment.Duration}}{{if gt $segment.Decisions 1}}; {{$segment.Decisions}} decisions{{end}}){{end}}.` diff --git a/internal/scheduling/explanation/types.go b/internal/scheduling/explanation/types.go deleted file mode 100644 index 31f6d0aa1..000000000 --- a/internal/scheduling/explanation/types.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import "time" - -// ExplanationContext holds all data needed to render a complete explanation. -type ExplanationContext struct { - Context ContextData `json:"context"` - History *HistoryData `json:"history,omitempty"` - Winner *WinnerData `json:"winner,omitempty"` - Input *InputData `json:"input,omitempty"` - CriticalSteps *CriticalStepsData `json:"criticalSteps,omitempty"` - DeletedHosts *DeletedHostsData `json:"deletedHosts,omitempty"` - StepImpacts []StepImpact `json:"stepImpacts,omitempty"` - Chain *ChainData `json:"chain,omitempty"` -} - -type ContextData struct { - ResourceType string `json:"resourceType"` - DecisionNumber int `json:"decisionNumber"` - IsInitial bool `json:"isInitial"` -} - -// HistoryData contains information about the previous decision in the chain. -type HistoryData struct { - PreviousTarget string `json:"previousTarget"` - CurrentTarget string `json:"currentTarget"` -} - -type WinnerData struct { - HostName string `json:"hostName"` - Score float64 `json:"score"` - Gap float64 `json:"gap"` - HostsEvaluated int `json:"hostsEvaluated"` - HasGap bool `json:"hasGap"` -} - -// InputData contains information about input vs final winner comparison. -type InputData struct { - InputWinner string `json:"inputWinner"` - InputScore float64 `json:"inputScore"` - FinalWinner string `json:"finalWinner"` - FinalScore float64 `json:"finalScore"` - FinalInputScore float64 `json:"finalInputScore"` // Final winner's input score - InputConfirmed bool `json:"inputConfirmed"` -} - -// CriticalStepsData contains information about which pipeline steps were critical. -type CriticalStepsData struct { - Steps []string `json:"steps"` - TotalSteps int `json:"totalSteps"` - IsInputOnly bool `json:"isInputOnly"` - RequiresAll bool `json:"requiresAll"` -} - -// DeletedHostsData contains information about hosts that were filtered out. -type DeletedHostsData struct { - DeletedHosts []DeletedHostInfo `json:"deletedHosts"` -} - -// DeletedHostInfo contains details about a single deleted host. -type DeletedHostInfo struct { - Name string `json:"name"` - Steps []string `json:"steps"` - IsInputWinner bool `json:"isInputWinner"` -} - -// ChainData contains information about the decision chain over time. -type ChainData struct { - Segments []ChainSegment `json:"segments"` - HasLoop bool `json:"hasLoop"` -} - -// ChainSegment represents a period where the resource was on a specific host. -type ChainSegment struct { - Host string `json:"host"` - Duration time.Duration `json:"duration"` - // number of decisions with this as the target host - Decisions int `json:"decisions"` -} diff --git a/internal/scheduling/lib/filter_weigher_pipeline.go b/internal/scheduling/lib/filter_weigher_pipeline.go index f362c07af..655f7382b 100644 --- a/internal/scheduling/lib/filter_weigher_pipeline.go +++ b/internal/scheduling/lib/filter_weigher_pipeline.go @@ -138,7 +138,7 @@ func InitNewFilterWeigherPipeline[RequestType FilterWeigherPipelineRequest]( func (p *filterWeigherPipeline[RequestType]) runFilters( log *slog.Logger, request RequestType, -) (filteredRequest RequestType) { +) (filteredRequest RequestType, stepResults []v1alpha1.StepResult) { filteredRequest = request for _, filterName := range p.filtersOrder { @@ -155,11 +155,15 @@ func (p *filterWeigherPipeline[RequestType]) runFilters( continue } stepLog.Info("scheduler: finished filter") + stepResults = append(stepResults, v1alpha1.StepResult{ + StepName: filterName, + Activations: result.Activations, + }) // Mutate the request to only include the remaining hosts. // Assume the resulting request type is the same as the input type. filteredRequest = filteredRequest.FilterHosts(result.Activations).(RequestType) } - return filteredRequest + return filteredRequest, stepResults } // Execute weighers and collect their activations by step name. @@ -275,7 +279,7 @@ func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (v1alpha1. // Run filters first to reduce the number of hosts. // Any weights assigned to filtered out hosts are ignored. - filteredRequest := p.runFilters(traceLog, request) + filteredRequest, filterStepResults := p.runFilters(traceLog, request) traceLog.Info( "scheduler: finished filters", "remainingHosts", filteredRequest.GetHosts(), @@ -296,9 +300,23 @@ func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (v1alpha1. // Collect some metrics about the pipeline execution. go p.monitor.observePipelineResult(request, hosts) + // Build step results from filters and weighers. + stepResults := filterStepResults + for _, weigherName := range p.weighersOrder { + activations, ok := stepWeights[weigherName] + if !ok { + continue + } + stepResults = append(stepResults, v1alpha1.StepResult{ + StepName: weigherName, + Activations: activations, + }) + } + result := v1alpha1.DecisionResult{ RawInWeights: request.GetWeights(), NormalizedInWeights: inWeights, + StepResults: stepResults, AggregatedOutWeights: outWeights, OrderedHosts: hosts, } diff --git a/internal/scheduling/lib/filter_weigher_pipeline_test.go b/internal/scheduling/lib/filter_weigher_pipeline_test.go index 5d025eb2b..0e2775944 100644 --- a/internal/scheduling/lib/filter_weigher_pipeline_test.go +++ b/internal/scheduling/lib/filter_weigher_pipeline_test.go @@ -221,7 +221,7 @@ func TestPipeline_RunFilters(t *testing.T) { Weights: map[string]float64{"host1": 0.0, "host2": 0.0, "host3": 0.0}, } - req := p.runFilters(slog.Default(), request) + req, _ := p.runFilters(slog.Default(), request) if len(req.Hosts) != 2 { t.Fatalf("expected 2 step results, got %d", len(req.Hosts)) } diff --git a/internal/scheduling/lib/history_client.go b/internal/scheduling/lib/history_client.go new file mode 100644 index 000000000..156b8521c --- /dev/null +++ b/internal/scheduling/lib/history_client.go @@ -0,0 +1,304 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "fmt" + "sort" + "strings" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/events" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + maxHostsInExplanation = 10 + maxHistoryEntries = 10 + maxHostsInOrderedList = 3 +) + +// joinHostsCapped joins up to max host names. If hosts exceeds max, it appends +// a count of the omitted entries, e.g. "host-a, host-b (and 48 more)". +func joinHostsCapped(hosts []string, maxHosts int) string { + if len(hosts) <= maxHosts { + return strings.Join(hosts, ", ") + } + return fmt.Sprintf("%s (and %d more)", strings.Join(hosts[:maxHosts], ", "), len(hosts)-maxHosts) +} + +func getName(schedulingDomain v1alpha1.SchedulingDomain, resourceID string) string { + return fmt.Sprintf("%s-%s", schedulingDomain, resourceID) +} + +// generateSimplifiedExplanation produces a human-readable explanation from a +// decision result. On failure it includes the error. On success it describes +// which pipeline steps filtered out which hosts. +func generateExplanation(result *v1alpha1.DecisionResult, pipelineErr error) string { + if pipelineErr != nil { + return fmt.Sprintf("Pipeline run failed: %s.", pipelineErr.Error()) + } + + if result == nil || len(result.StepResults) == 0 { + if result != nil && result.TargetHost != nil { + return fmt.Sprintf("Selected host: %s.", *result.TargetHost) + } + return "" + } + + // Get all initial hosts from input weights. + var allHosts map[string]float64 + if len(result.RawInWeights) > 0 { + allHosts = result.RawInWeights + } else if len(result.NormalizedInWeights) > 0 { + allHosts = result.NormalizedInWeights + } + if allHosts == nil { + if result.TargetHost != nil { + return fmt.Sprintf("Selected host: %s.", *result.TargetHost) + } + return "" + } + + var sb strings.Builder + fmt.Fprintf(&sb, "Started with %d host(s).\n\n", len(allHosts)) + + // Track current set of surviving hosts. + currentHosts := make(map[string]bool, len(allHosts)) + for h := range allHosts { + currentHosts[h] = true + } + + for _, step := range result.StepResults { + // Determine which hosts were removed by this step. + var removed []string + for h := range currentHosts { + if _, exists := step.Activations[h]; !exists { + removed = append(removed, h) + } + } + if len(removed) > 0 { + sort.Strings(removed) + for _, h := range removed { + delete(currentHosts, h) + } + fmt.Fprintf(&sb, "%s filtered out %s\n", + step.StepName, + joinHostsCapped(removed, maxHostsInExplanation), + ) + } + } + + // Summary of remaining hosts. + remaining := make([]string, 0, len(currentHosts)) + for h := range currentHosts { + remaining = append(remaining, h) + } + sort.Strings(remaining) + fmt.Fprintf(&sb, "\n%d hosts remaining (%s)\n", + len(remaining), + joinHostsCapped(remaining, maxHostsInExplanation), + ) + + if result.TargetHost != nil { + fmt.Fprintf(&sb, "\nSelected host: %s.", *result.TargetHost) + } + + return strings.TrimSpace(sb.String()) +} + +// HistoryClient manages History CRDs for scheduling decisions. It holds the +// Kubernetes client and event recorder so callers don't have to pass them on +// every invocation. +type HistoryClient struct { + Client client.Client + Recorder events.EventRecorder +} + +// CreateOrUpdateHistory creates or updates a History CRD for the given decision. +// It is called after every pipeline run (success and failure). The pipelineErr +// parameter is used to generate a meaningful explanation when the pipeline fails. +// If a non-nil Recorder is set, a Kubernetes Event is emitted on the History +// object to short-term persist the scheduling decision. +func (h *HistoryClient) CreateOrUpdateHistory( + ctx context.Context, + decision *v1alpha1.Decision, + az *string, + pipelineErr error, +) error { + + if decision == nil { + return errors.New("decision cannot be nil") + } + + log := ctrl.LoggerFrom(ctx) + + name := getName(decision.Spec.SchedulingDomain, decision.Spec.ResourceID) + + history := &v1alpha1.History{} + err := h.Client.Get(ctx, client.ObjectKey{Name: name}, history) + + if apierrors.IsNotFound(err) { + // Create new History CRD. + history = &v1alpha1.History{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: v1alpha1.HistorySpec{ + SchedulingDomain: decision.Spec.SchedulingDomain, + ResourceID: decision.Spec.ResourceID, + AvailabilityZone: az, + }, + } + if createErr := h.Client.Create(ctx, history); createErr != nil { + if apierrors.IsAlreadyExists(createErr) { + // Another controller beat us to it, re-fetch. + if getErr := h.Client.Get(ctx, client.ObjectKey{Name: name}, history); getErr != nil { + return getErr + } + } else { + log.Error(createErr, "failed to create history CRD", "name", name) + return createErr + } + } + } else if err != nil { + log.Error(err, "failed to get history CRD", "name", name) + return err + } + + successful := pipelineErr == nil && decision.Status.Result != nil && decision.Status.Result.TargetHost != nil + + namespacedName := client.ObjectKey{Name: name} + + // Use Update instead of MergeFrom+Patch because JSON merge patch strips + // boolean false values, which causes CRD validation to reject the patch + // when Successful is false. Retry on conflict to handle concurrent updates. + firstAttempt := true + if retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + // On retries, re-fetch the latest History object after a conflict. + if !firstAttempt { + if getErr := h.Client.Get(ctx, namespacedName, history); getErr != nil { + return getErr + } + } + firstAttempt = false + + // Archive the previous current decision into the history list. + if !history.Status.Current.Timestamp.IsZero() { + orderedHosts := history.Status.Current.OrderedHosts + if orderedHosts == nil { + orderedHosts = []string{} + } + entry := v1alpha1.SchedulingHistoryEntry{ + Timestamp: history.Status.Current.Timestamp, + PipelineRef: history.Status.Current.PipelineRef, + Intent: history.Status.Current.Intent, + OrderedHosts: orderedHosts, + Successful: history.Status.Current.Successful, + } + history.Status.History = append(history.Status.History, entry) + if len(history.Status.History) > maxHistoryEntries { + history.Status.History = history.Status.History[len(history.Status.History)-maxHistoryEntries:] + } + } + + // Build the new current decision. + current := v1alpha1.CurrentDecision{ + Timestamp: metav1.Now(), + PipelineRef: decision.Spec.PipelineRef, + Intent: decision.Spec.Intent, + Successful: successful, + Explanation: generateExplanation(decision.Status.Result, pipelineErr), + } + + current.OrderedHosts = []string{} + if decision.Status.Result != nil { + current.TargetHost = decision.Status.Result.TargetHost + hosts := decision.Status.Result.OrderedHosts + if len(hosts) > maxHostsInOrderedList { + hosts = hosts[:maxHostsInOrderedList] + } + current.OrderedHosts = hosts + } + history.Status.Current = current + + // Set Ready condition — True only when a host was successfully selected. + condStatus := metav1.ConditionTrue + reason := v1alpha1.HistoryReasonSchedulingSucceeded + message := "scheduling decision selected a target host" + if pipelineErr != nil { + condStatus = metav1.ConditionFalse + reason = v1alpha1.HistoryReasonPipelineRunFailed + message = "pipeline run failed: " + pipelineErr.Error() + } else if !successful { + condStatus = metav1.ConditionFalse + reason = v1alpha1.HistoryReasonNoHostFound + message = "pipeline completed but no suitable host was found" + } + meta.SetStatusCondition(&history.Status.Conditions, metav1.Condition{ + Type: v1alpha1.HistoryConditionReady, + Status: condStatus, + Reason: reason, + Message: message, + }) + + return h.Client.Status().Update(ctx, history) + }); retryErr != nil { + log.Error(retryErr, "failed to update history CRD status", "name", name) + return retryErr + } + + // Emit a Kubernetes Event on the History object to short-term persist the + // scheduling decision. Events auto-expire (default TTL ~1h) so this gives + // devops short-term visibility into individual scheduling runs. + if h.Recorder != nil { + eventType := corev1.EventTypeNormal + eventReason := v1alpha1.HistoryReasonSchedulingSucceeded + action := "Scheduled" + if !successful { + eventType = corev1.EventTypeWarning + eventReason = "SchedulingFailed" + action = "FailedScheduling" + } + h.Recorder.Eventf(history, nil, eventType, eventReason, action, "%s", history.Status.Current.Explanation) + } + + log.Info("history CRD updated", "name", name, "entries", len(history.Status.History)) + return nil +} + +// Delete deletes the History CRD associated with the given scheduling domain +// and resource ID. It is a no-op if the History CRD does not exist. +func (h *HistoryClient) Delete( + ctx context.Context, + schedulingDomain v1alpha1.SchedulingDomain, + resourceID string, +) error { + + log := ctrl.LoggerFrom(ctx) + name := getName(schedulingDomain, resourceID) + + history := &v1alpha1.History{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + if err := h.Client.Delete(ctx, history); err != nil { + if apierrors.IsNotFound(err) { + return nil + } + log.Error(err, "failed to delete history CRD", "name", name) + return err + } + log.Info("deleted history CRD", "name", name) + return nil +} diff --git a/internal/scheduling/lib/history_client_test.go b/internal/scheduling/lib/history_client_test.go new file mode 100644 index 000000000..d9e919cb7 --- /dev/null +++ b/internal/scheduling/lib/history_client_test.go @@ -0,0 +1,679 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + "testing" + "time" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + testlib "github.com/cobaltcore-dev/cortex/pkg/testing" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestGetName(t *testing.T) { + tests := []struct { + domain v1alpha1.SchedulingDomain + resourceID string + expected string + }{ + {v1alpha1.SchedulingDomainNova, "uuid-1", "nova-uuid-1"}, + {v1alpha1.SchedulingDomainCinder, "vol-abc", "cinder-vol-abc"}, + {v1alpha1.SchedulingDomainManila, "share-xyz", "manila-share-xyz"}, + {v1alpha1.SchedulingDomainMachines, "machine-1", "machines-machine-1"}, + {v1alpha1.SchedulingDomainPods, "pod-ns-name", "pods-pod-ns-name"}, + } + for _, tt := range tests { + t.Run(tt.expected, func(t *testing.T) { + got := getName(tt.domain, tt.resourceID) + if got != tt.expected { + t.Errorf("getName(%q, %q) = %q, want %q", tt.domain, tt.resourceID, got, tt.expected) + } + }) + } +} + +func TestGenerateExplanation(t *testing.T) { + tests := []struct { + name string + result *v1alpha1.DecisionResult + err error + expected string + }{ + { + name: "nil result no error", + result: nil, + err: nil, + expected: "", + }, + { + name: "nil result with error", + result: nil, + err: errors.New("something broke"), + expected: "Pipeline run failed: something broke.", + }, + { + name: "result with target host only no steps", + result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("host-1"), + }, + expected: "Selected host: host-1.", + }, + { + name: "result with RawInWeights and filtering steps", + result: &v1alpha1.DecisionResult{ + RawInWeights: map[string]float64{ + "host-a": 1.0, + "host-b": 0.5, + "host-c": 0.3, + }, + StepResults: []v1alpha1.StepResult{ + { + StepName: "filter_capacity", + Activations: map[string]float64{ + "host-a": 1.0, + "host-b": 0.5, + }, + }, + { + StepName: "filter_status", + Activations: map[string]float64{ + "host-a": 1.0, + }, + }, + }, + TargetHost: testlib.Ptr("host-a"), + }, + expected: "Started with 3 host(s).\n\n" + + "filter_capacity filtered out host-c\n" + + "filter_status filtered out host-b\n\n" + + "1 hosts remaining (host-a)\n\n" + + "Selected host: host-a.", + }, + { + name: "uses NormalizedInWeights when RawInWeights empty", + result: &v1alpha1.DecisionResult{ + NormalizedInWeights: map[string]float64{ + "host-x": 0.6, + "host-y": 0.4, + }, + StepResults: []v1alpha1.StepResult{ + { + StepName: "weigher_cpu", + Activations: map[string]float64{ + "host-x": 0.8, + "host-y": 0.2, + }, + }, + }, + TargetHost: testlib.Ptr("host-x"), + }, + expected: "Started with 2 host(s).\n\n\n2 hosts remaining (host-x, host-y)\n\nSelected host: host-x.", + }, + { + name: "no weights no target", + result: &v1alpha1.DecisionResult{ + StepResults: []v1alpha1.StepResult{ + {StepName: "some-step", Activations: map[string]float64{}}, + }, + }, + expected: "", + }, + { + name: "no weights with target", + result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("host-1"), + StepResults: []v1alpha1.StepResult{ + {StepName: "some-step", Activations: map[string]float64{}}, + }, + }, + expected: "Selected host: host-1.", + }, + { + name: "all hosts survive all steps", + result: &v1alpha1.DecisionResult{ + RawInWeights: map[string]float64{ + "host-a": 1.0, + "host-b": 0.5, + }, + StepResults: []v1alpha1.StepResult{ + { + StepName: "noop", + Activations: map[string]float64{ + "host-a": 1.0, + "host-b": 0.5, + }, + }, + }, + TargetHost: testlib.Ptr("host-a"), + }, + expected: "Started with 2 host(s).\n\n\n2 hosts remaining (host-a, host-b)\n\nSelected host: host-a.", + }, + { + name: "large cluster caps host lists", + result: func() *v1alpha1.DecisionResult { + weights := make(map[string]float64, 50) + for i := range 50 { + weights[fmt.Sprintf("host-%03d", i)] = 1.0 + } + // Step filters out 20 hosts (keep host-000..host-029). + surviving := make(map[string]float64, 30) + for i := range 30 { + surviving[fmt.Sprintf("host-%03d", i)] = 1.0 + } + return &v1alpha1.DecisionResult{ + RawInWeights: weights, + StepResults: []v1alpha1.StepResult{ + { + StepName: "filter_big", + Activations: surviving, + }, + }, + TargetHost: testlib.Ptr("host-000"), + } + }(), + expected: "Started with 50 host(s).\n\n" + + "filter_big filtered out host-030, host-031, host-032, host-033, host-034, host-035, host-036, host-037, host-038, host-039 (and 10 more)\n\n" + + "30 hosts remaining (host-000, host-001, host-002, host-003, host-004, host-005, host-006, host-007, host-008, host-009 (and 20 more))\n\n" + + "Selected host: host-000.", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := generateExplanation(tt.result, tt.err) + if got != tt.expected { + t.Errorf("generateExplanation() =\n%q\nwant:\n%q", got, tt.expected) + } + }) + } +} + +func newTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } + return scheme +} + +func TestHistoryManager_Upsert(t *testing.T) { + tests := []struct { + name string + // setup returns a fake client and any pre-existing objects. + setup func(t *testing.T) client.Client + // decision to upsert. + decision *v1alpha1.Decision + pipelineErr error + // assertions on the history list (archived entries only). + expectHistoryLen int + // assertions on the current decision. + expectTargetHost *string + expectSuccessful bool + expectCondStatus metav1.ConditionStatus + expectReason string + checkExplanation func(t *testing.T, explanation string) + checkCurrentHosts func(t *testing.T, hosts []string) + }{ + { + name: "create new history", + setup: func(t *testing.T) client.Client { + return fake.NewClientBuilder(). + WithScheme(newTestScheme(t)). + WithStatusSubresource(&v1alpha1.History{}). + Build() + }, + decision: &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "uuid-1", + PipelineRef: corev1.ObjectReference{Name: "nova-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + Status: v1alpha1.DecisionStatus{ + Result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("compute-1"), + }, + }, + }, + expectHistoryLen: 0, + expectTargetHost: testlib.Ptr("compute-1"), + expectSuccessful: true, + expectCondStatus: metav1.ConditionTrue, + expectReason: v1alpha1.HistoryReasonSchedulingSucceeded, + checkExplanation: func(t *testing.T, explanation string) { + if !strings.Contains(explanation, "Selected host: compute-1") { + t.Errorf("expected explanation to contain selected host, got: %q", explanation) + } + }, + }, + { + name: "update existing history with pre-existing entries", + setup: func(t *testing.T) client.Client { + scheme := newTestScheme(t) + existing := &v1alpha1.History{ + ObjectMeta: metav1.ObjectMeta{Name: "nova-uuid-2"}, + Spec: v1alpha1.HistorySpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "uuid-2", + }, + Status: v1alpha1.HistoryStatus{ + History: []v1alpha1.SchedulingHistoryEntry{ + {Timestamp: metav1.Now(), PipelineRef: corev1.ObjectReference{Name: "old-pipeline"}}, + }, + }, + } + return fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(existing). + WithStatusSubresource(&v1alpha1.History{}). + Build() + }, + decision: &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "uuid-2", + PipelineRef: corev1.ObjectReference{Name: "nova-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + Status: v1alpha1.DecisionStatus{ + Result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("compute-2"), + }, + }, + }, + expectHistoryLen: 1, // pre-existing entry preserved, no current to archive + expectTargetHost: testlib.Ptr("compute-2"), + expectSuccessful: true, + expectCondStatus: metav1.ConditionTrue, + expectReason: v1alpha1.HistoryReasonSchedulingSucceeded, + }, + { + name: "archive current to history on second upsert", + setup: func(t *testing.T) client.Client { + scheme := newTestScheme(t) + existing := &v1alpha1.History{ + ObjectMeta: metav1.ObjectMeta{Name: "nova-uuid-3"}, + Spec: v1alpha1.HistorySpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "uuid-3", + }, + Status: v1alpha1.HistoryStatus{ + Current: v1alpha1.CurrentDecision{ + Timestamp: metav1.Now(), + PipelineRef: corev1.ObjectReference{Name: "old-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + Successful: true, + TargetHost: testlib.Ptr("old-host"), + }, + }, + } + return fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(existing). + WithStatusSubresource(&v1alpha1.History{}). + Build() + }, + decision: &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "uuid-3", + PipelineRef: corev1.ObjectReference{Name: "new-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + Status: v1alpha1.DecisionStatus{ + Result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("new-host"), + }, + }, + }, + expectHistoryLen: 1, // old current archived + expectTargetHost: testlib.Ptr("new-host"), + expectSuccessful: true, + expectCondStatus: metav1.ConditionTrue, + expectReason: v1alpha1.HistoryReasonSchedulingSucceeded, + }, + { + name: "pipeline error", + setup: func(t *testing.T) client.Client { + return fake.NewClientBuilder(). + WithScheme(newTestScheme(t)). + WithStatusSubresource(&v1alpha1.History{}). + Build() + }, + decision: &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + ResourceID: "vol-1", + PipelineRef: corev1.ObjectReference{Name: "cinder-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + }, + pipelineErr: errors.New("no hosts available"), + expectHistoryLen: 0, + expectTargetHost: nil, + expectSuccessful: false, + expectCondStatus: metav1.ConditionFalse, + expectReason: v1alpha1.HistoryReasonPipelineRunFailed, + checkExplanation: func(t *testing.T, explanation string) { + if !strings.Contains(explanation, "no hosts available") { + t.Errorf("expected explanation to contain error text, got: %q", explanation) + } + }, + }, + { + name: "no host found", + setup: func(t *testing.T) client.Client { + return fake.NewClientBuilder(). + WithScheme(newTestScheme(t)). + WithStatusSubresource(&v1alpha1.History{}). + Build() + }, + decision: &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "uuid-nohost", + PipelineRef: corev1.ObjectReference{Name: "nova-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + Status: v1alpha1.DecisionStatus{ + Result: &v1alpha1.DecisionResult{ + // Pipeline succeeded but no target host selected (all filtered out). + OrderedHosts: []string{}, + }, + }, + }, + expectHistoryLen: 0, + expectTargetHost: nil, + expectSuccessful: false, + expectCondStatus: metav1.ConditionFalse, + expectReason: v1alpha1.HistoryReasonNoHostFound, + }, + { + name: "history capped at 10 entries", + setup: func(t *testing.T) client.Client { + scheme := newTestScheme(t) + entries := make([]v1alpha1.SchedulingHistoryEntry, 10) + for i := range entries { + entries[i] = v1alpha1.SchedulingHistoryEntry{ + Timestamp: metav1.Now(), + PipelineRef: corev1.ObjectReference{Name: fmt.Sprintf("pipeline-%d", i)}, + } + } + existing := &v1alpha1.History{ + ObjectMeta: metav1.ObjectMeta{Name: "manila-share-1"}, + Spec: v1alpha1.HistorySpec{ + SchedulingDomain: v1alpha1.SchedulingDomainManila, + ResourceID: "share-1", + }, + Status: v1alpha1.HistoryStatus{ + Current: v1alpha1.CurrentDecision{ + Timestamp: metav1.Now(), + PipelineRef: corev1.ObjectReference{Name: "prev-pipeline"}, + Successful: true, + TargetHost: testlib.Ptr("old-backend"), + }, + History: entries, + }, + } + return fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(existing). + WithStatusSubresource(&v1alpha1.History{}). + Build() + }, + decision: &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainManila, + ResourceID: "share-1", + PipelineRef: corev1.ObjectReference{Name: "manila-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + Status: v1alpha1.DecisionStatus{ + Result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("backend-1"), + }, + }, + }, + expectHistoryLen: 10, // 10 existing + 1 archived current, capped to 10 + expectTargetHost: testlib.Ptr("backend-1"), + expectSuccessful: true, + expectCondStatus: metav1.ConditionTrue, + expectReason: v1alpha1.HistoryReasonSchedulingSucceeded, + }, + { + name: "ordered hosts capped at 3", + setup: func(t *testing.T) client.Client { + return fake.NewClientBuilder(). + WithScheme(newTestScheme(t)). + WithStatusSubresource(&v1alpha1.History{}). + Build() + }, + decision: &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "uuid-cap", + PipelineRef: corev1.ObjectReference{Name: "nova-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + Status: v1alpha1.DecisionStatus{ + Result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("h1"), + OrderedHosts: []string{"h1", "h2", "h3", "h4", "h5"}, + }, + }, + }, + expectHistoryLen: 0, + expectTargetHost: testlib.Ptr("h1"), + expectSuccessful: true, + expectCondStatus: metav1.ConditionTrue, + expectReason: v1alpha1.HistoryReasonSchedulingSucceeded, + checkCurrentHosts: func(t *testing.T, hosts []string) { + if len(hosts) != 3 { + t.Errorf("ordered hosts length = %d, want 3", len(hosts)) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := tt.setup(t) + hm := HistoryClient{Client: c} + + err := hm.CreateOrUpdateHistory(context.Background(), tt.decision, nil, tt.pipelineErr) + if err != nil { + t.Fatalf("Upsert() returned error: %v", err) + } + + // Fetch the history CRD. + name := getName(tt.decision.Spec.SchedulingDomain, tt.decision.Spec.ResourceID) + var history v1alpha1.History + if err := c.Get(context.Background(), client.ObjectKey{Name: name}, &history); err != nil { + t.Fatalf("Failed to get history: %v", err) + } + + // Verify spec. + if history.Spec.SchedulingDomain != tt.decision.Spec.SchedulingDomain { + t.Errorf("spec.schedulingDomain = %q, want %q", history.Spec.SchedulingDomain, tt.decision.Spec.SchedulingDomain) + } + if history.Spec.ResourceID != tt.decision.Spec.ResourceID { + t.Errorf("spec.resourceID = %q, want %q", history.Spec.ResourceID, tt.decision.Spec.ResourceID) + } + + // Verify history list length. + if len(history.Status.History) != tt.expectHistoryLen { + t.Errorf("status.history has %d entries, want %d", len(history.Status.History), tt.expectHistoryLen) + } + + // Verify current decision. + current := history.Status.Current + if current.Successful != tt.expectSuccessful { + t.Errorf("current.successful = %v, want %v", current.Successful, tt.expectSuccessful) + } + if current.PipelineRef.Name != tt.decision.Spec.PipelineRef.Name { + t.Errorf("current.pipelineRef = %q, want %q", current.PipelineRef.Name, tt.decision.Spec.PipelineRef.Name) + } + + // Verify target host. + if tt.expectTargetHost == nil { + if current.TargetHost != nil { + t.Errorf("current.targetHost = %q, want nil", *current.TargetHost) + } + } else { + if current.TargetHost == nil { + t.Errorf("current.targetHost = nil, want %q", *tt.expectTargetHost) + } else if *current.TargetHost != *tt.expectTargetHost { + t.Errorf("current.targetHost = %q, want %q", *current.TargetHost, *tt.expectTargetHost) + } + } + + // Verify condition. + if len(history.Status.Conditions) == 0 { + t.Fatal("expected at least one condition") + } + readyCond := history.Status.Conditions[0] + if readyCond.Status != tt.expectCondStatus { + t.Errorf("condition status = %q, want %q", readyCond.Status, tt.expectCondStatus) + } + if readyCond.Reason != tt.expectReason { + t.Errorf("condition reason = %q, want %q", readyCond.Reason, tt.expectReason) + } + + // Verify explanation. + if tt.checkExplanation != nil { + tt.checkExplanation(t, current.Explanation) + } + + // Verify ordered hosts on current. + if tt.checkCurrentHosts != nil { + tt.checkCurrentHosts(t, current.OrderedHosts) + } + }) + } +} + +func TestHistoryManager_UpsertFromGoroutine(t *testing.T) { + c := fake.NewClientBuilder(). + WithScheme(newTestScheme(t)). + WithStatusSubresource(&v1alpha1.History{}). + Build() + hm := HistoryClient{Client: c} + + decision := &v1alpha1.Decision{ + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "async-uuid", + PipelineRef: corev1.ObjectReference{Name: "nova-pipeline"}, + Intent: v1alpha1.SchedulingIntentUnknown, + }, + Status: v1alpha1.DecisionStatus{ + Result: &v1alpha1.DecisionResult{ + TargetHost: testlib.Ptr("compute-async"), + }, + }, + } + + // Mirrors the pattern used in pipeline controllers. + var wg sync.WaitGroup + var upsertErr error + wg.Go(func() { + upsertErr = hm.CreateOrUpdateHistory(context.Background(), decision, nil, nil) + }) + + // Poll for history creation. + var histories v1alpha1.HistoryList + deadline := time.Now().Add(2 * time.Second) + for { + if err := c.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) > 0 { + break + } + if time.Now().After(deadline) { + t.Fatal("timed out waiting for async history creation") + } + time.Sleep(5 * time.Millisecond) + } + + wg.Wait() + if upsertErr != nil { + t.Fatalf("Upsert() returned error: %v", upsertErr) + } + + got := histories.Items[0].Status.Current.TargetHost + if got == nil || *got != "compute-async" { + t.Errorf("target host = %v, want %q", got, "compute-async") + } +} + +func TestHistoryManager_Delete(t *testing.T) { + tests := []struct { + name string + domain v1alpha1.SchedulingDomain + resourceID string + preCreate bool + }{ + { + name: "delete existing", + domain: v1alpha1.SchedulingDomainNova, + resourceID: "uuid-del", + preCreate: true, + }, + { + name: "delete non-existing is no-op", + domain: v1alpha1.SchedulingDomainCinder, + resourceID: "does-not-exist", + preCreate: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := newTestScheme(t) + var objects []client.Object + if tt.preCreate { + objects = append(objects, &v1alpha1.History{ + ObjectMeta: metav1.ObjectMeta{ + Name: getName(tt.domain, tt.resourceID), + }, + Spec: v1alpha1.HistorySpec{ + SchedulingDomain: tt.domain, + ResourceID: tt.resourceID, + }, + }) + } + + c := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + Build() + hm := HistoryClient{Client: c} + + err := hm.Delete(context.Background(), tt.domain, tt.resourceID) + if err != nil { + t.Fatalf("Delete() returned error: %v", err) + } + + // Verify history is gone. + var histories v1alpha1.HistoryList + if err := c.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) != 0 { + t.Errorf("expected 0 histories, got %d", len(histories.Items)) + } + }) + } +} diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index a435133e4..94e61ed45 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -30,6 +30,8 @@ type BasePipelineController[PipelineType any] struct { client.Client // The scheduling domain to scope resources to. SchedulingDomain v1alpha1.SchedulingDomain + // Manager for creating, updating, and deleting History CRDs. + HistoryManager HistoryClient } // Handle the startup of the manager by initializing the pipeline map. diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go index 5dceb4d3a..d76203812 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go @@ -99,12 +99,6 @@ func (c *FilterWeigherPipelineController) ProcessNewMachine(ctx context.Context, if !ok { return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() err := c.process(ctx, decision) if err != nil { meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ @@ -121,10 +115,9 @@ func (c *FilterWeigherPipelineController) ProcessNewMachine(ctx context.Context, Message: "pipeline run succeeded", }) } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err + if pipelineConf.Spec.CreateHistory { + if upsertErr := c.HistoryManager.CreateOrUpdateHistory(ctx, decision, nil, err); upsertErr != nil { + ctrl.LoggerFrom(ctx).Error(upsertErr, "failed to create/update history") } } return err @@ -215,20 +208,12 @@ func (c *FilterWeigherPipelineController) handleMachine() handler.EventHandler { } }, DeleteFunc: func(ctx context.Context, evt event.DeleteEvent, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { - // Delete the associated decision(s). - log := ctrl.LoggerFrom(ctx) + c.processMu.Lock() + defer c.processMu.Unlock() machine := evt.Object.(*ironcorev1alpha1.Machine) - var decisions v1alpha1.DecisionList - if err := c.List(ctx, &decisions); err != nil { - log.Error(err, "failed to list decisions for deleted machine") - return - } - for _, decision := range decisions.Items { - if decision.Spec.MachineRef.Name == machine.Name && decision.Spec.MachineRef.Namespace == machine.Namespace { - if err := c.Delete(ctx, &decision); err != nil { - log.Error(err, "failed to delete decision for deleted machine") - } - } + if err := c.HistoryManager.Delete(ctx, v1alpha1.SchedulingDomainMachines, machine.Name); err != nil { + log := ctrl.LoggerFrom(ctx) + log.Error(err, "failed to delete history CRD for machine", "machine", machine.Name) } }, } @@ -237,6 +222,7 @@ func (c *FilterWeigherPipelineController) handleMachine() handler.EventHandler { func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainMachines + c.HistoryManager = lib.HistoryClient{Client: mcl, Recorder: mgr.GetEventRecorder("cortex-machines-scheduler")} if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go index 58c574f45..bc2e0722a 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go @@ -6,6 +6,7 @@ package machines import ( "context" "testing" + "time" "github.com/cobaltcore-dev/cortex/api/external/ironcore" ironcorev1alpha1 "github.com/cobaltcore-dev/cortex/api/external/ironcore/v1alpha1" @@ -289,14 +290,14 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { machine *ironcorev1alpha1.Machine machinePools []ironcorev1alpha1.MachinePool pipelineConfig *v1alpha1.Pipeline - createDecisions bool + createHistory bool expectError bool - expectDecisionCreated bool + expectHistoryCreated bool expectMachinePoolAssigned bool expectTargetHost string }{ { - name: "successful machine processing with decision creation", + name: "successful machine processing with history creation", machine: &ironcorev1alpha1.Machine{ ObjectMeta: metav1.ObjectMeta{ Name: "test-machine", @@ -321,22 +322,22 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, + createHistory: true, expectError: false, - expectDecisionCreated: true, + expectHistoryCreated: true, expectMachinePoolAssigned: true, expectTargetHost: "pool1", }, { - name: "successful machine processing without decision creation", + name: "successful machine processing without history creation", machine: &ironcorev1alpha1.Machine{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-machine-no-decision", + Name: "test-machine-no-history", Namespace: "default", }, Spec: ironcorev1alpha1.MachineSpec{ @@ -355,14 +356,14 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, - CreateDecisions: false, + CreateHistory: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, + createHistory: false, expectError: false, - expectDecisionCreated: false, + expectHistoryCreated: false, expectMachinePoolAssigned: true, expectTargetHost: "pool1", }, @@ -380,7 +381,7 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { machinePools: []ironcorev1alpha1.MachinePool{}, pipelineConfig: nil, expectError: true, - expectDecisionCreated: false, + expectHistoryCreated: false, expectMachinePoolAssigned: false, }, { @@ -402,14 +403,14 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, + createHistory: true, expectError: true, - expectDecisionCreated: true, // Decision is created but processing fails + expectHistoryCreated: true, // Decision is created but processing fails expectMachinePoolAssigned: false, }, } @@ -427,13 +428,14 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { client := fake.NewClientBuilder(). WithScheme(scheme). WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). + WithStatusSubresource(&v1alpha1.Decision{}, &v1alpha1.History{}). Build() controller := &FilterWeigherPipelineController{ BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]]{ Pipelines: map[string]lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]{}, PipelineConfigs: map[string]v1alpha1.Pipeline{}, + HistoryManager: lib.HistoryClient{Client: client}, }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -456,70 +458,29 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { return } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.MachineRef != nil && - decision.Spec.MachineRef.Name == tt.machine.Name && - decision.Spec.MachineRef.Namespace == tt.machine.Namespace { - found = true - - // Verify decision properties - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { - t.Errorf("expected scheduling domain %q, got %q", v1alpha1.SchedulingDomainMachines, decision.Spec.SchedulingDomain) - } - if decision.Spec.ResourceID != tt.machine.Name { - t.Errorf("expected resource ID %q, got %q", tt.machine.Name, decision.Spec.ResourceID) - } - if decision.Spec.PipelineRef.Name != "machines-scheduler" { - t.Errorf("expected pipeline ref %q, got %q", "machines-scheduler", decision.Spec.PipelineRef.Name) - } - - // Check if result was set (only for successful cases) - if !tt.expectError && tt.expectTargetHost != "" { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - if decision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - if *decision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *decision.Status.Result.TargetHost) - } - } + // Check if history CRD was created when expected + if tt.expectHistoryCreated { + var histories v1alpha1.HistoryList + deadline := time.Now().Add(2 * time.Second) + for { + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) > 0 { break } - } - - if !found { - t.Error("expected decision to be created but was not found") + if time.Now().After(deadline) { + t.Fatal("timed out waiting for history CRD to be created") + } + time.Sleep(5 * time.Millisecond) } } else { - // Check that no decisions were created - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return + var histories v1alpha1.HistoryList + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) } - - for _, decision := range decisions.Items { - if decision.Spec.MachineRef != nil && - decision.Spec.MachineRef.Name == tt.machine.Name && - decision.Spec.MachineRef.Namespace == tt.machine.Namespace { - t.Error("expected no decision to be created but found one") - break - } + if len(histories.Items) != 0 { + t.Error("Expected no history CRD but found one") } } diff --git a/internal/scheduling/manila/external_scheduler_api.go b/internal/scheduling/manila/external_scheduler_api.go index 2ad5a9265..64e96c0bd 100644 --- a/internal/scheduling/manila/external_scheduler_api.go +++ b/internal/scheduling/manila/external_scheduler_api.go @@ -149,6 +149,7 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R }, ResourceID: "", // TODO model out the spec. ManilaRaw: &raw, + Intent: v1alpha1.SchedulingIntentUnknown, }, } ctx := r.Context() diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go index faa13b121..6ab938511 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go @@ -78,12 +78,6 @@ func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context. if !ok { return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() err := c.process(ctx, decision) if err != nil { meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ @@ -100,10 +94,9 @@ func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context. Message: "pipeline run succeeded", }) } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err + if pipelineConf.Spec.CreateHistory { + if upsertErr := c.HistoryManager.CreateOrUpdateHistory(ctx, decision, nil, err); upsertErr != nil { + ctrl.LoggerFrom(ctx).Error(upsertErr, "failed to create/update history") } } return err @@ -155,6 +148,7 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainManila + c.HistoryManager = lib.HistoryClient{Client: mcl, Recorder: mgr.GetEventRecorder("cortex-manila-scheduler")} if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go index a9fc2df1d..e16c55c27 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go @@ -7,6 +7,7 @@ import ( "context" "encoding/json" "testing" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" @@ -245,13 +246,13 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) } tests := []struct { - name string - decision *v1alpha1.Decision - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectResult bool + name string + decision *v1alpha1.Decision + pipelineConfig *v1alpha1.Pipeline + createHistory bool + expectError bool + expectHistoryCreated bool + expectResult bool }{ { name: "successful decision processing with creation", @@ -262,6 +263,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) }, Spec: v1alpha1.DecisionSpec{ SchedulingDomain: v1alpha1.SchedulingDomainManila, + ResourceID: "test-uuid-1", PipelineRef: corev1.ObjectReference{ Name: "test-pipeline", }, @@ -277,15 +279,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectResult: true, + createHistory: true, + expectError: false, + expectHistoryCreated: true, + expectResult: true, }, { name: "successful decision processing without creation", @@ -311,15 +313,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - CreateDecisions: false, + CreateHistory: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectResult: true, + createHistory: false, + expectError: false, + expectHistoryCreated: false, + expectResult: true, }, { name: "pipeline not configured", @@ -338,10 +340,10 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) }, }, }, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + pipelineConfig: nil, + expectError: true, + expectHistoryCreated: false, + expectResult: false, }, { name: "decision without manilaRaw spec", @@ -365,15 +367,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + createHistory: true, + expectError: true, + expectHistoryCreated: true, + expectResult: false, }, } @@ -387,7 +389,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) client := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). + WithStatusSubresource(&v1alpha1.Decision{}, &v1alpha1.History{}). Build() controller := &FilterWeigherPipelineController{ @@ -395,6 +397,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Client: client, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + HistoryManager: lib.HistoryClient{Client: client}, }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -417,41 +420,23 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected no error but got: %v", err) } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainManila { - found = true - - // Verify decision properties - if decision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("expected pipeline ref %q, got %q", "test-pipeline", decision.Spec.PipelineRef.Name) - } - - // Check if result was set - if tt.expectResult { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - } + // Check if history CRD was created when expected + if tt.expectHistoryCreated { + var histories v1alpha1.HistoryList + deadline := time.Now().Add(2 * time.Second) + for { + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) > 0 { break } - } - - if !found { - t.Error("expected decision to be created but was not found") + if time.Now().After(deadline) { + t.Fatal("timed out waiting for history CRD to be created") + } + time.Sleep(5 * time.Millisecond) } } else if !tt.expectError { - // For cases without creation, check that the decision has the right status if tt.expectResult && tt.decision.Status.Result == nil { t.Error("expected decision result to be set in original decision object") } diff --git a/internal/scheduling/manila/decisions_cleanup.go b/internal/scheduling/manila/history_cleanup.go similarity index 77% rename from internal/scheduling/manila/decisions_cleanup.go rename to internal/scheduling/manila/history_cleanup.go index ee662db4c..824755261 100644 --- a/internal/scheduling/manila/decisions_cleanup.go +++ b/internal/scheduling/manila/history_cleanup.go @@ -21,15 +21,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -type DecisionsCleanupConfig struct { +type HistoryCleanupConfig struct { // Secret ref to keystone credentials stored in a k8s secret. KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef"` // Secret ref to SSO credentials stored in a k8s secret, if applicable. SSOSecretRef *corev1.SecretReference `json:"ssoSecretRef"` } -// Delete all decisions for manila shares that have been deleted. -func DecisionsCleanup(ctx context.Context, client client.Client, conf DecisionsCleanupConfig) error { +// Delete all history entries for manila shares that have been deleted. +func HistoryCleanup(ctx context.Context, client client.Client, conf HistoryCleanupConfig) error { var authenticatedHTTP = http.DefaultClient if conf.SSOSecretRef != nil { var err error @@ -110,23 +110,23 @@ func DecisionsCleanup(ctx context.Context, client client.Client, conf DecisionsC sharesByID[share.ID] = struct{}{} } - // List all decisions and delete those whose share no longer exists. - decisionList := &v1alpha1.DecisionList{} - if err := client.List(ctx, decisionList); err != nil { + // List all history and delete those whose share no longer exists. + historyList := &v1alpha1.HistoryList{} + if err := client.List(ctx, historyList); err != nil { return err } - for _, decision := range decisionList.Items { - // Skip non-manila decisions. - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { + for _, history := range historyList.Items { + // Skip non-manila histories. + if history.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { continue } - // Skip decisions for which the share still exists. - if _, ok := sharesByID[decision.Spec.ResourceID]; ok { + // Skip histories for which the share still exists. + if _, ok := sharesByID[history.Spec.ResourceID]; ok { continue } - // Delete the decision since the share no longer exists. - slog.Info("deleting decision for deleted share", "decision", decision.Name, "shareID", decision.Spec.ResourceID) - if err := client.Delete(ctx, &decision); err != nil { + // Delete the history entry since the share no longer exists. + slog.Info("deleting history entry for deleted share", "history", history.Name, "shareID", history.Spec.ResourceID) + if err := client.Delete(ctx, &history); err != nil { return err } } diff --git a/internal/scheduling/manila/decisions_cleanup_test.go b/internal/scheduling/manila/history_cleanup_test.go similarity index 83% rename from internal/scheduling/manila/decisions_cleanup_test.go rename to internal/scheduling/manila/history_cleanup_test.go index f978ab2f4..f2ea5f914 100644 --- a/internal/scheduling/manila/decisions_cleanup_test.go +++ b/internal/scheduling/manila/history_cleanup_test.go @@ -33,7 +33,7 @@ func TestCleanupManila(t *testing.T) { tests := []struct { name string - decisions []v1alpha1.Decision + histories []v1alpha1.History expectError bool authError bool endpointError bool @@ -44,45 +44,45 @@ func TestCleanupManila(t *testing.T) { }{ { name: "handle authentication error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, authError: true, expectError: true, }, { name: "handle endpoint error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, endpointError: true, expectError: true, }, { name: "handle server error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, mockServerError: true, expectError: true, }, { name: "handle empty shares case", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, emptySharesError: true, expectError: true, }, { - name: "delete decisions for non-existent shares", - decisions: []v1alpha1.Decision{ + name: "delete history for non-existent shares", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-existing-share", + Name: "history-existing-share", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainManila, ResourceID: "share-exists", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-deleted-share", + Name: "history-deleted-share", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainManila, ResourceID: "share-deleted", }, @@ -91,26 +91,26 @@ func TestCleanupManila(t *testing.T) { mockShares: []mockShare{ {ID: "share-exists"}, }, - expectedDeleted: []string{"decision-deleted-share"}, + expectedDeleted: []string{"history-deleted-share"}, expectError: false, }, { - name: "keep decisions for existing shares", - decisions: []v1alpha1.Decision{ + name: "keep history for existing shares", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-share-1", + Name: "history-share-1", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainManila, ResourceID: "share-1", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-share-2", + Name: "history-share-2", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainManila, ResourceID: "share-2", }, @@ -124,22 +124,22 @@ func TestCleanupManila(t *testing.T) { expectError: false, }, { - name: "skip non-manila decisions", - decisions: []v1alpha1.Decision{ + name: "skip non-manila histories", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-other-type", + Name: "history-other-type", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, ResourceID: "some-resource", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-other-operator", + Name: "history-other-operator", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: "other-operator", ResourceID: "share-1", }, @@ -153,9 +153,9 @@ func TestCleanupManila(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - objects := make([]client.Object, len(tt.decisions)) - for i := range tt.decisions { - objects[i] = &tt.decisions[i] + objects := make([]client.Object, len(tt.histories)) + for i := range tt.histories { + objects[i] = &tt.histories[i] } // Create mock Manila server @@ -331,13 +331,13 @@ func TestCleanupManila(t *testing.T) { WithScheme(scheme). WithObjects(objects...). Build() - config := DecisionsCleanupConfig{ + config := HistoryCleanupConfig{ KeystoneSecretRef: corev1.SecretReference{ Name: "keystone-secret", Namespace: "default", }, } - err := DecisionsCleanup(context.Background(), client, config) + err := HistoryCleanup(context.Background(), client, config) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -347,32 +347,32 @@ func TestCleanupManila(t *testing.T) { } if !tt.expectError { - // Verify expected decisions were deleted + // Verify expected history entries were deleted for _, expectedDeleted := range tt.expectedDeleted { - var decision v1alpha1.Decision + var history v1alpha1.History err := client.Get(context.Background(), - types.NamespacedName{Name: expectedDeleted}, &decision) + types.NamespacedName{Name: expectedDeleted}, &history) if err == nil { - t.Errorf("Expected decision %s to be deleted but it still exists", expectedDeleted) + t.Errorf("Expected history %s to be deleted but it still exists", expectedDeleted) } } - // Verify other decisions still exist - for _, originalDecision := range tt.decisions { + // Verify other histories still exist + for _, originalHistory := range tt.histories { shouldBeDeleted := false for _, expectedDeleted := range tt.expectedDeleted { - if originalDecision.Name == expectedDeleted { + if originalHistory.Name == expectedDeleted { shouldBeDeleted = true break } } if !shouldBeDeleted { - var decision v1alpha1.Decision + var history v1alpha1.History err := client.Get(context.Background(), - types.NamespacedName{Name: originalDecision.Name}, &decision) + types.NamespacedName{Name: originalHistory.Name}, &history) if err != nil { - t.Errorf("Expected decision %s to still exist but got error: %v", - originalDecision.Name, err) + t.Errorf("Expected history %s to still exist but got error: %v", + originalHistory.Name, err) } } } @@ -381,7 +381,7 @@ func TestCleanupManila(t *testing.T) { } } -func TestCleanupManilaDecisionsCancel(t *testing.T) { +func TestCleanupManilaHistoryCancel(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add scheme: %v", err) @@ -413,7 +413,7 @@ func TestCleanupManilaDecisionsCancel(t *testing.T) { WithObjects(objects...). Build() - config := DecisionsCleanupConfig{ + config := HistoryCleanupConfig{ KeystoneSecretRef: corev1.SecretReference{ Name: "keystone-secret", Namespace: "default", @@ -424,7 +424,7 @@ func TestCleanupManilaDecisionsCancel(t *testing.T) { defer cancel() // This should exit quickly due to context cancellation - if err := DecisionsCleanup(ctx, client, config); err != nil { + if err := HistoryCleanup(ctx, client, config); err != nil { if !errors.Is(err, context.DeadlineExceeded) { t.Errorf("Unexpected error during cleanup: %v", err) } diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index 0210a81b8..216f0dd62 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -204,6 +204,7 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req }, ResourceID: requestData.Spec.Data.InstanceUUID, NovaRaw: &raw, + Intent: v1alpha1.SchedulingIntentUnknown, }, } ctx := r.Context() diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index deb126250..bb9c5bb07 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -81,12 +81,6 @@ func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context. if !ok { return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() err := c.process(ctx, decision) if err != nil { meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ @@ -103,15 +97,33 @@ func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context. Message: "pipeline run succeeded", }) } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } + if pipelineConf.Spec.CreateHistory { + c.upsertHistory(ctx, decision, err) } return err } +func (c *FilterWeigherPipelineController) upsertHistory(ctx context.Context, decision *v1alpha1.Decision, pipelineErr error) { + log := ctrl.LoggerFrom(ctx) + + var az *string + + if decision.Spec.NovaRaw != nil { + var request api.ExternalSchedulerRequest + err := json.Unmarshal(decision.Spec.NovaRaw.Raw, &request) + if err != nil { + log.Error(err, "failed to unmarshal novaRaw for history, using defaults") + } else { + azStr := request.Spec.Data.AvailabilityZone + az = &azStr + } + } + + if upsertErr := c.HistoryManager.CreateOrUpdateHistory(ctx, decision, az, pipelineErr); upsertErr != nil { + log.Error(upsertErr, "failed to create/update history") + } +} + func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) startedAt := time.Now() // So we can measure sync duration. @@ -131,6 +143,13 @@ func (c *FilterWeigherPipelineController) process(ctx context.Context, decision return err } + if intent, err := request.GetIntent(); err != nil { + log.Error(err, "failed to get intent from nova request, using Unknown") + decision.Spec.Intent = v1alpha1.SchedulingIntentUnknown + } else { + decision.Spec.Intent = intent + } + // If necessary gather all placement candidates before filtering. // This will override the hosts and weights in the nova request. pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] @@ -180,6 +199,7 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainNova + c.HistoryManager = lib.HistoryClient{Client: mcl, Recorder: mgr.GetEventRecorder("cortex-nova-scheduler")} c.gatherer = &candidateGatherer{Client: mcl} if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go index 287b488d9..752725df8 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go @@ -9,6 +9,7 @@ import ( "errors" "strings" "testing" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -393,17 +394,17 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) } tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - pipelineConf *v1alpha1.Pipeline - setupPipelineConfigs bool - createDecisions bool - expectError bool - expectResult bool - expectCreatedDecision bool - expectUpdatedStatus bool - errorContains string + name string + decision *v1alpha1.Decision + pipeline *v1alpha1.Pipeline + pipelineConf *v1alpha1.Pipeline + setupPipelineConfigs bool + createHistory bool + expectError bool + expectResult bool + expectHistoryCreated bool + expectUpdatedStatus bool + errorContains string }{ { name: "successful processing with decision creation enabled", @@ -414,6 +415,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) }, Spec: v1alpha1.DecisionSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "test-uuid-1", PipelineRef: corev1.ObjectReference{ Name: "test-pipeline", }, @@ -429,7 +431,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, @@ -441,17 +443,17 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: false, - expectResult: true, - expectCreatedDecision: true, - expectUpdatedStatus: true, + setupPipelineConfigs: true, + createHistory: true, + expectError: false, + expectResult: true, + expectHistoryCreated: true, + expectUpdatedStatus: true, }, { name: "successful processing with decision creation disabled", @@ -462,6 +464,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) }, Spec: v1alpha1.DecisionSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, + ResourceID: "test-uuid-2", PipelineRef: corev1.ObjectReference{ Name: "test-pipeline-no-create", }, @@ -477,7 +480,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: false, + CreateHistory: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, @@ -489,17 +492,17 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: false, + CreateHistory: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: false, - expectError: false, - expectResult: true, - expectCreatedDecision: false, - expectUpdatedStatus: false, + setupPipelineConfigs: true, + createHistory: false, + expectError: false, + expectResult: true, + expectHistoryCreated: false, + expectUpdatedStatus: false, }, { name: "pipeline not configured", @@ -518,14 +521,14 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) }, }, }, - pipeline: nil, - pipelineConf: nil, - setupPipelineConfigs: false, - expectError: true, - expectResult: false, - expectCreatedDecision: false, - expectUpdatedStatus: false, - errorContains: "pipeline nonexistent-pipeline not configured", + pipeline: nil, + pipelineConf: nil, + setupPipelineConfigs: false, + expectError: true, + expectResult: false, + expectHistoryCreated: false, + expectUpdatedStatus: false, + errorContains: "pipeline nonexistent-pipeline not configured", }, { name: "decision without novaRaw spec", @@ -549,7 +552,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, @@ -561,18 +564,18 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "no novaRaw spec defined", + setupPipelineConfigs: true, + createHistory: true, + expectError: true, + expectResult: false, + expectHistoryCreated: true, + expectUpdatedStatus: false, + errorContains: "no novaRaw spec defined", }, { name: "processing fails after decision creation", @@ -599,18 +602,18 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "pipeline not found or not ready", + setupPipelineConfigs: true, + createHistory: true, + expectError: true, + expectResult: false, + expectHistoryCreated: true, + expectUpdatedStatus: false, + errorContains: "pipeline not found or not ready", }, { name: "pipeline not found in runtime map", @@ -637,18 +640,18 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "pipeline not found or not ready", + setupPipelineConfigs: true, + createHistory: true, + expectError: true, + expectResult: false, + expectHistoryCreated: true, + expectUpdatedStatus: false, + errorContains: "pipeline not found or not ready", }, } @@ -662,7 +665,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) client := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). + WithStatusSubresource(&v1alpha1.Decision{}, &v1alpha1.History{}). Build() controller := &FilterWeigherPipelineController{ @@ -670,6 +673,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Client: client, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + HistoryManager: lib.HistoryClient{Client: client}, }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -707,24 +711,33 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected error to contain %q, got: %v", tt.errorContains, err) } - // Check if decision was created in the cluster when expected - if tt.expectCreatedDecision { - var createdDecision v1alpha1.Decision - key := types.NamespacedName{Name: tt.decision.Name, Namespace: tt.decision.Namespace} - err := client.Get(context.Background(), key, &createdDecision) - if err != nil { - t.Errorf("Expected decision to be created but got error: %v", err) + // Check if history CRD was created when expected + if tt.expectHistoryCreated { + var histories v1alpha1.HistoryList + deadline := time.Now().Add(2 * time.Second) + for { + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) > 0 { + break + } + if time.Now().After(deadline) { + t.Fatal("timed out waiting for history CRD to be created") + } + time.Sleep(5 * time.Millisecond) } } else { - var createdDecision v1alpha1.Decision - key := types.NamespacedName{Name: tt.decision.Name, Namespace: tt.decision.Namespace} - err := client.Get(context.Background(), key, &createdDecision) - if err == nil { - t.Error("Expected decision not to be created but it was found") + var histories v1alpha1.HistoryList + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) != 0 { + t.Error("Expected no history CRD but found one") } } - // Validate result and duration expectations + // Validate result expectations if tt.expectResult && tt.decision.Status.Result == nil { t.Error("Expected result to be set but was nil") } @@ -851,7 +864,7 @@ func TestFilterWeigherPipelineController_IgnorePreselection(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: false, + CreateHistory: false, IgnorePreselection: tt.ignorePreselection, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, diff --git a/internal/scheduling/nova/decisions_cleanup.go b/internal/scheduling/nova/history_cleanup.go similarity index 76% rename from internal/scheduling/nova/decisions_cleanup.go rename to internal/scheduling/nova/history_cleanup.go index 7414f8bc4..72f4d5583 100644 --- a/internal/scheduling/nova/decisions_cleanup.go +++ b/internal/scheduling/nova/history_cleanup.go @@ -19,15 +19,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -type DecisionsCleanupConfig struct { +type HistoryCleanupConfig struct { // Secret ref to keystone credentials stored in a k8s secret. KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef"` // Secret ref to SSO credentials stored in a k8s secret, if applicable. SSOSecretRef *corev1.SecretReference `json:"ssoSecretRef"` } -// Delete all decisions for nova servers that have been deleted. -func DecisionsCleanup(ctx context.Context, client client.Client, conf DecisionsCleanupConfig) error { +// Delete all histories for nova servers that have been deleted. +func HistoryCleanup(ctx context.Context, client client.Client, conf HistoryCleanupConfig) error { var authenticatedHTTP = http.DefaultClient if conf.SSOSecretRef != nil { var err error @@ -119,27 +119,27 @@ func DecisionsCleanup(ctx context.Context, client client.Client, conf DecisionsC reservationsByName[reservation.Name] = reservation } - // List all decisions and check if the server still exists. - decisionList := &v1alpha1.DecisionList{} - if err := client.List(ctx, decisionList); err != nil { + // List all histories and check if the server still exists. + historyList := &v1alpha1.HistoryList{} + if err := client.List(ctx, historyList); err != nil { return err } - for _, decision := range decisionList.Items { - // Skip non-nova decisions. - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + for _, history := range historyList.Items { + // Skip non-nova histories. + if history.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { continue } - // Skip decisions that are linked to existing reservations. - if _, ok := reservationsByName[decision.Spec.ResourceID]; ok { + // Skip histories that are linked to existing reservations. + if _, ok := reservationsByName[history.Spec.ResourceID]; ok { continue } - // Skip decisions for which the server still exists. - if _, ok := serversByID[decision.Spec.ResourceID]; ok { + // Skip histories for which the server still exists. + if _, ok := serversByID[history.Spec.ResourceID]; ok { continue } - // Delete the decision since the server no longer exists. - slog.Info("deleting decision for deleted server", "decision", decision.Name, "serverID", decision.Spec.ResourceID) - if err := client.Delete(ctx, &decision); err != nil { + // Delete the history since the server no longer exists. + slog.Info("deleting history for deleted server", "history", history.Name, "serverID", history.Spec.ResourceID) + if err := client.Delete(ctx, &history); err != nil { return err } } diff --git a/internal/scheduling/nova/decisions_cleanup_test.go b/internal/scheduling/nova/history_cleanup_test.go similarity index 80% rename from internal/scheduling/nova/decisions_cleanup_test.go rename to internal/scheduling/nova/history_cleanup_test.go index 9e84d8dbe..3123a8368 100644 --- a/internal/scheduling/nova/decisions_cleanup_test.go +++ b/internal/scheduling/nova/history_cleanup_test.go @@ -33,7 +33,7 @@ func TestCleanupNova(t *testing.T) { tests := []struct { name string - decisions []v1alpha1.Decision + histories []v1alpha1.History reservations []v1alpha1.Reservation mockServers []mockServer authError bool @@ -45,45 +45,45 @@ func TestCleanupNova(t *testing.T) { }{ { name: "authentication error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, authError: true, expectError: true, }, { name: "endpoint discovery error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, endpointError: true, expectError: true, }, { name: "nova server error", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, serverError: true, expectError: true, }, { name: "no servers found", - decisions: []v1alpha1.Decision{}, + histories: []v1alpha1.History{}, emptyServers: true, expectError: false, }, { - name: "delete decisions for non-existent servers", - decisions: []v1alpha1.Decision{ + name: "delete histories for non-existent servers", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-existing-server", + Name: "history-existing-server", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, ResourceID: "server-exists", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-deleted-server", + Name: "history-deleted-server", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, ResourceID: "server-deleted", }, @@ -92,26 +92,26 @@ func TestCleanupNova(t *testing.T) { mockServers: []mockServer{ {ID: "server-exists"}, }, - expectedDeleted: []string{"decision-deleted-server"}, + expectedDeleted: []string{"history-deleted-server"}, expectError: false, }, { - name: "keep decisions for existing servers", - decisions: []v1alpha1.Decision{ + name: "keep histories for existing servers", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-server-1", + Name: "history-server-1", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, ResourceID: "server-1", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-server-2", + Name: "history-server-2", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, ResourceID: "server-2", }, @@ -125,22 +125,22 @@ func TestCleanupNova(t *testing.T) { expectError: false, }, { - name: "skip decisions with linked reservations", - decisions: []v1alpha1.Decision{ + name: "skip histories with linked reservations", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-reserved-server", + Name: "history-reserved-server", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, ResourceID: "server-reserved", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-unreserved-server", + Name: "history-unreserved-server", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, ResourceID: "server-unreserved", }, @@ -154,26 +154,26 @@ func TestCleanupNova(t *testing.T) { }, }, mockServers: []mockServer{}, - expectedDeleted: []string{"decision-unreserved-server"}, + expectedDeleted: []string{"history-unreserved-server"}, expectError: false, }, { - name: "skip non-nova decisions", - decisions: []v1alpha1.Decision{ + name: "skip non-nova histories", + histories: []v1alpha1.History{ { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-cinder", + Name: "history-cinder", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: v1alpha1.SchedulingDomainCinder, ResourceID: "volume-1", }, }, { ObjectMeta: metav1.ObjectMeta{ - Name: "decision-wrong-operator", + Name: "history-wrong-operator", }, - Spec: v1alpha1.DecisionSpec{ + Spec: v1alpha1.HistorySpec{ SchedulingDomain: "other-operator", ResourceID: "server-1", }, @@ -187,9 +187,9 @@ func TestCleanupNova(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - objects := make([]client.Object, 0, len(tt.decisions)+len(tt.reservations)) - for i := range tt.decisions { - objects = append(objects, &tt.decisions[i]) + objects := make([]client.Object, 0, len(tt.histories)+len(tt.reservations)) + for i := range tt.histories { + objects = append(objects, &tt.histories[i]) } for i := range tt.reservations { objects = append(objects, &tt.reservations[i]) @@ -334,13 +334,13 @@ func TestCleanupNova(t *testing.T) { WithScheme(scheme). WithObjects(objects...). Build() - config := DecisionsCleanupConfig{ + config := HistoryCleanupConfig{ KeystoneSecretRef: corev1.SecretReference{ Name: "keystone-secret", Namespace: "default", }, } - err := DecisionsCleanup(context.Background(), client, config) + err := HistoryCleanup(context.Background(), client, config) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -350,32 +350,32 @@ func TestCleanupNova(t *testing.T) { } if !tt.expectError { - // Verify expected decisions were deleted + // Verify expected histories were deleted for _, expectedDeleted := range tt.expectedDeleted { - var decision v1alpha1.Decision + var history v1alpha1.History err := client.Get(context.Background(), - types.NamespacedName{Name: expectedDeleted}, &decision) + types.NamespacedName{Name: expectedDeleted}, &history) if err == nil { - t.Errorf("Expected decision %s to be deleted but it still exists", expectedDeleted) + t.Errorf("Expected history %s to be deleted but it still exists", expectedDeleted) } } - // Verify other decisions still exist - for _, originalDecision := range tt.decisions { + // Verify other histories still exist + for _, originalHistory := range tt.histories { shouldBeDeleted := false for _, expectedDeleted := range tt.expectedDeleted { - if originalDecision.Name == expectedDeleted { + if originalHistory.Name == expectedDeleted { shouldBeDeleted = true break } } if !shouldBeDeleted { - var decision v1alpha1.Decision + var history v1alpha1.History err := client.Get(context.Background(), - types.NamespacedName{Name: originalDecision.Name}, &decision) + types.NamespacedName{Name: originalHistory.Name}, &history) if err != nil { - t.Errorf("Expected decision %s to still exist but got error: %v", - originalDecision.Name, err) + t.Errorf("Expected history %s to still exist but got error: %v", + originalHistory.Name, err) } } } @@ -384,7 +384,7 @@ func TestCleanupNova(t *testing.T) { } } -func TestCleanupNovaDecisionsCancel(t *testing.T) { +func TestCleanupNovaHistoriesCancel(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add scheme: %v", err) @@ -415,7 +415,7 @@ func TestCleanupNovaDecisionsCancel(t *testing.T) { WithObjects(objects...). Build() - config := DecisionsCleanupConfig{ + config := HistoryCleanupConfig{ KeystoneSecretRef: corev1.SecretReference{ Name: "keystone-secret", Namespace: "default", @@ -426,7 +426,7 @@ func TestCleanupNovaDecisionsCancel(t *testing.T) { defer cancel() // This should exit quickly due to context cancellation - if err := DecisionsCleanup(ctx, client, config); err != nil { + if err := HistoryCleanup(ctx, client, config); err != nil { if !errors.Is(err, context.DeadlineExceeded) { t.Errorf("Unexpected error during cleanup: %v", err) } diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go index 42b6d550b..ceba977b8 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go @@ -83,7 +83,7 @@ func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod }, Spec: v1alpha1.DecisionSpec{ SchedulingDomain: v1alpha1.SchedulingDomainPods, - ResourceID: pod.Name, + ResourceID: pod.Namespace + "--" + pod.Name, PipelineRef: corev1.ObjectReference{ Name: "pods-scheduler", }, @@ -91,6 +91,7 @@ func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod Name: pod.Name, Namespace: pod.Namespace, }, + Intent: v1alpha1.SchedulingIntentUnknown, }, } @@ -98,12 +99,6 @@ func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod if !ok { return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() err := c.process(ctx, decision) if err != nil { meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ @@ -120,10 +115,9 @@ func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod Message: "pipeline run succeeded", }) } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err + if pipelineConf.Spec.CreateHistory { + if upsertErr := c.HistoryManager.CreateOrUpdateHistory(ctx, decision, nil, err); upsertErr != nil { + ctrl.LoggerFrom(ctx).Error(upsertErr, "failed to create/update history") } } return err @@ -226,20 +220,12 @@ func (c *FilterWeigherPipelineController) handlePod() handler.EventHandler { } }, DeleteFunc: func(ctx context.Context, evt event.DeleteEvent, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { - // Delete the associated decision(s). - log := ctrl.LoggerFrom(ctx) + c.processMu.Lock() + defer c.processMu.Unlock() pod := evt.Object.(*corev1.Pod) - var decisions v1alpha1.DecisionList - if err := c.List(ctx, &decisions); err != nil { - log.Error(err, "failed to list decisions for deleted pod") - return - } - for _, decision := range decisions.Items { - if decision.Spec.PodRef.Name == pod.Name && decision.Spec.PodRef.Namespace == pod.Namespace { - if err := c.Delete(ctx, &decision); err != nil { - log.Error(err, "failed to delete decision for deleted pod") - } - } + if err := c.HistoryManager.Delete(ctx, v1alpha1.SchedulingDomainPods, pod.Namespace+"--"+pod.Name); err != nil { + log := ctrl.LoggerFrom(ctx) + log.Error(err, "failed to delete history CRD for pod", "pod", pod.Name) } }, } @@ -248,6 +234,7 @@ func (c *FilterWeigherPipelineController) handlePod() handler.EventHandler { func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainPods + c.HistoryManager = lib.HistoryClient{Client: mcl, Recorder: mgr.GetEventRecorder("cortex-pods-scheduler")} if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go index 3d523873b..143ed9f83 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go @@ -6,6 +6,7 @@ package pods import ( "context" "testing" + "time" "github.com/cobaltcore-dev/cortex/api/external/pods" "github.com/cobaltcore-dev/cortex/api/v1alpha1" @@ -263,15 +264,15 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { } tests := []struct { - name string - pod *corev1.Pod - nodes []corev1.Node - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectNodeAssigned bool - expectTargetHost string + name string + pod *corev1.Pod + nodes []corev1.Node + pipelineConfig *v1alpha1.Pipeline + createHistory bool + expectError bool + expectHistoryCreated bool + expectNodeAssigned bool + expectTargetHost string }{ { name: "successful pod processing with decision creation", @@ -299,16 +300,16 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectNodeAssigned: true, - expectTargetHost: "node1", + createHistory: true, + expectError: false, + expectHistoryCreated: true, + expectNodeAssigned: true, + expectTargetHost: "node1", }, { name: "successful pod processing without decision creation", @@ -333,16 +334,16 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, - CreateDecisions: false, + CreateHistory: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectNodeAssigned: true, - expectTargetHost: "node1", + createHistory: false, + expectError: false, + expectHistoryCreated: false, + expectNodeAssigned: true, + expectTargetHost: "node1", }, { name: "pipeline not configured", @@ -355,11 +356,11 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { SchedulerName: "", }, }, - nodes: []corev1.Node{}, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectNodeAssigned: false, + nodes: []corev1.Node{}, + pipelineConfig: nil, + expectError: true, + expectHistoryCreated: false, + expectNodeAssigned: false, }, { name: "no nodes available", @@ -380,15 +381,15 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, - CreateDecisions: true, + CreateHistory: true, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: true, // Decision is created but processing fails - expectNodeAssigned: false, + createHistory: true, + expectError: true, + expectHistoryCreated: true, // Decision is created but processing fails + expectNodeAssigned: false, }, } @@ -405,13 +406,14 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { client := fake.NewClientBuilder(). WithScheme(scheme). WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). + WithStatusSubresource(&v1alpha1.Decision{}, &v1alpha1.History{}). Build() controller := &FilterWeigherPipelineController{ BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[pods.PodPipelineRequest]]{ Pipelines: map[string]lib.FilterWeigherPipeline[pods.PodPipelineRequest]{}, PipelineConfigs: map[string]v1alpha1.Pipeline{}, + HistoryManager: lib.HistoryClient{Client: client}, }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -434,70 +436,29 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { return } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.PodRef != nil && - decision.Spec.PodRef.Name == tt.pod.Name && - decision.Spec.PodRef.Namespace == tt.pod.Namespace { - found = true - - // Verify decision properties - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - t.Errorf("expected scheduling domain %q, got %q", v1alpha1.SchedulingDomainPods, decision.Spec.SchedulingDomain) - } - if decision.Spec.ResourceID != tt.pod.Name { - t.Errorf("expected resource ID %q, got %q", tt.pod.Name, decision.Spec.ResourceID) - } - if decision.Spec.PipelineRef.Name != "pods-scheduler" { - t.Errorf("expected pipeline ref %q, got %q", "pods-scheduler", decision.Spec.PipelineRef.Name) - } - - // Check if result was set (only for successful cases) - if !tt.expectError && tt.expectTargetHost != "" { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - if decision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - if *decision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *decision.Status.Result.TargetHost) - } - } + // Check if history CRD was created when expected + if tt.expectHistoryCreated { + var histories v1alpha1.HistoryList + deadline := time.Now().Add(2 * time.Second) + for { + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) + } + if len(histories.Items) > 0 { break } - } - - if !found { - t.Error("expected decision to be created but was not found") + if time.Now().After(deadline) { + t.Fatal("timed out waiting for history CRD to be created") + } + time.Sleep(5 * time.Millisecond) } } else { - // Check that no decisions were created - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return + var histories v1alpha1.HistoryList + if err := client.List(context.Background(), &histories); err != nil { + t.Fatalf("Failed to list histories: %v", err) } - - for _, decision := range decisions.Items { - if decision.Spec.PodRef != nil && - decision.Spec.PodRef.Name == tt.pod.Name && - decision.Spec.PodRef.Namespace == tt.pod.Namespace { - t.Error("expected no decision to be created but found one") - break - } + if len(histories.Items) != 0 { + t.Error("Expected no history CRD but found one") } } diff --git a/tools/plutono/provisioning/dashboards/cortex-status.json b/tools/plutono/provisioning/dashboards/cortex-status.json index 2481aded0..f83e2926b 100644 --- a/tools/plutono/provisioning/dashboards/cortex-status.json +++ b/tools/plutono/provisioning/dashboards/cortex-status.json @@ -16,7 +16,7 @@ "editable": true, "gnetId": null, "graphTooltip": 0, - "id": 1, + "id": 3, "links": [], "panels": [ { @@ -276,56 +276,7 @@ }, "unit": "none" }, - "overrides": [ - { - "matcher": { - "id": "byFrameRefID", - "options": "B" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(71, 71, 71)", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byFrameRefID", - "options": "C" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(71, 71, 71)", - "value": null - }, - { - "color": "red", - "value": 10 - } - ] - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, @@ -333,89 +284,6 @@ "x": 12, "y": 9 }, - "id": 55, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": true - }, - "text": {}, - "textMode": "value_and_name" - }, - "pluginVersion": "7.5.37", - "targets": [ - { - "exemplar": true, - "expr": "cortex_decision_state{state=\"success\"}", - "format": "time_series", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{state}}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "cortex_decision_state{state!=\"success\",state!=\"waiting\"}", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{state}}", - "refId": "B" - }, - { - "exemplar": true, - "expr": "cortex_decision_state{state!=\"success\",state=\"waiting\"}", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{state}}", - "refId": "C" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Decision status", - "type": "stat" - }, - { - "datasource": "prometheus-openstack", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "rgb(66, 66, 66)", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, "id": 54, "options": { "colorMode": "background", @@ -466,7 +334,7 @@ "h": 1, "w": 24, "x": 0, - "y": 25 + "y": 17 }, "id": 49, "panels": [], @@ -483,7 +351,7 @@ "h": 1, "w": 12, "x": 0, - "y": 26 + "y": 18 }, "id": 41, "options": { @@ -512,7 +380,7 @@ "h": 1, "w": 12, "x": 12, - "y": 26 + "y": 18 }, "id": 43, "options": { @@ -556,7 +424,7 @@ "h": 12, "w": 12, "x": 0, - "y": 27 + "y": 19 }, "heatmap": {}, "hideZeroBuckets": false, @@ -632,7 +500,7 @@ "h": 12, "w": 12, "x": 12, - "y": 27 + "y": 19 }, "heatmap": {}, "hideZeroBuckets": false, @@ -701,7 +569,7 @@ "h": 12, "w": 24, "x": 0, - "y": 39 + "y": 31 }, "hiddenSeries": false, "id": 39, @@ -801,7 +669,7 @@ "h": 11, "w": 6, "x": 0, - "y": 51 + "y": 43 }, "hiddenSeries": false, "id": 31, @@ -898,7 +766,7 @@ "h": 11, "w": 6, "x": 6, - "y": 51 + "y": 43 }, "hiddenSeries": false, "id": 33, @@ -1010,7 +878,7 @@ "h": 11, "w": 6, "x": 12, - "y": 51 + "y": 43 }, "hiddenSeries": false, "id": 35, @@ -1122,7 +990,7 @@ "h": 11, "w": 6, "x": 18, - "y": 51 + "y": 43 }, "hiddenSeries": false, "id": 37, @@ -1232,7 +1100,7 @@ "h": 12, "w": 12, "x": 0, - "y": 62 + "y": 54 }, "hiddenSeries": false, "id": 27, @@ -1340,7 +1208,7 @@ "h": 12, "w": 12, "x": 12, - "y": 62 + "y": 54 }, "hiddenSeries": false, "id": 29, @@ -1428,7 +1296,7 @@ "h": 1, "w": 24, "x": 0, - "y": 74 + "y": 66 }, "id": 5, "panels": [], @@ -1453,7 +1321,7 @@ "h": 11, "w": 12, "x": 0, - "y": 75 + "y": 67 }, "hiddenSeries": false, "id": 2, @@ -1573,7 +1441,7 @@ "h": 11, "w": 12, "x": 12, - "y": 75 + "y": 67 }, "hiddenSeries": false, "id": 3, @@ -1712,7 +1580,7 @@ "h": 12, "w": 24, "x": 0, - "y": 86 + "y": 78 }, "id": 50, "options": { @@ -1753,7 +1621,7 @@ "h": 1, "w": 24, "x": 0, - "y": 98 + "y": 90 }, "id": 25, "panels": [], @@ -1776,7 +1644,7 @@ "h": 14, "w": 12, "x": 0, - "y": 99 + "y": 91 }, "hiddenSeries": false, "id": 21, @@ -1878,7 +1746,7 @@ "h": 14, "w": 12, "x": 12, - "y": 99 + "y": 91 }, "hiddenSeries": false, "id": 23, @@ -1971,7 +1839,7 @@ "h": 1, "w": 24, "x": 0, - "y": 113 + "y": 105 }, "id": 19, "panels": [], @@ -1994,7 +1862,7 @@ "h": 13, "w": 12, "x": 0, - "y": 114 + "y": 106 }, "hiddenSeries": false, "id": 17, @@ -2092,7 +1960,7 @@ "h": 13, "w": 12, "x": 12, - "y": 114 + "y": 106 }, "hiddenSeries": false, "id": 15, @@ -2189,7 +2057,7 @@ "h": 12, "w": 12, "x": 0, - "y": 127 + "y": 119 }, "hiddenSeries": false, "id": 11, @@ -2287,7 +2155,7 @@ "h": 12, "w": 12, "x": 12, - "y": 127 + "y": 119 }, "hiddenSeries": false, "id": 13,