diff --git a/changelogs/current.yaml b/changelogs/current.yaml index f79775e9d26fd..e18881f8d2f1f 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -37,6 +37,11 @@ behavior_changes: minor_behavior_changes: # *Changes that may cause incompatibilities for some users, but should not for most* +- area: load_balancing + change: | + Changed the default behavior of ``OrcaWeightManager`` to prefer named metrics over application + utilization when computing utilization. This behavior can be reverted by setting the runtime guard + ``envoy.reloadable_features.orca_weight_manager_use_named_metrics_first`` to ``false``. - area: compressor change: | Strong ``ETag`` removal when compressing now uses the same weak ``W/`` check as diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 4bf81224c95fc..86c9c3a80e006 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -73,6 +73,7 @@ RUNTIME_GUARD(envoy_reloadable_features_oauth2_encrypt_tokens); RUNTIME_GUARD(envoy_reloadable_features_odcds_over_ads_fix); RUNTIME_GUARD(envoy_reloadable_features_on_demand_cluster_no_recreate_stream); RUNTIME_GUARD(envoy_reloadable_features_on_demand_track_end_stream); +RUNTIME_GUARD(envoy_reloadable_features_orca_weight_manager_use_named_metrics_first); RUNTIME_GUARD(envoy_reloadable_features_original_dst_rely_on_idle_timeout); RUNTIME_GUARD(envoy_reloadable_features_prefix_map_matcher_resume_after_subtree_miss); RUNTIME_GUARD(envoy_reloadable_features_proxy_protocol_allow_duplicate_tlvs); diff --git a/source/extensions/load_balancing_policies/common/orca_weight_manager.cc b/source/extensions/load_balancing_policies/common/orca_weight_manager.cc index 2b0c78e13daf2..6459abd074d8e 100644 --- a/source/extensions/load_balancing_policies/common/orca_weight_manager.cc +++ b/source/extensions/load_balancing_policies/common/orca_weight_manager.cc @@ -9,6 +9,7 @@ #include "envoy/upstream/upstream.h" #include "source/common/orca/orca_load_metrics.h" +#include "source/common/runtime/runtime_features.h" #include "absl/status/status.h" #include "xds/data/orca/v3/orca_load_report.pb.h" @@ -35,18 +36,41 @@ OrcaLoadReportHandler::OrcaLoadReportHandler(const OrcaWeightManagerConfig& conf double OrcaLoadReportHandler::getUtilizationFromOrcaReport( const OrcaLoadReportProto& orca_load_report, const std::vector& metric_names_for_computing_utilization) { - // If application_utilization is valid, use it as the utilization metric. - double utilization = orca_load_report.application_utilization(); - if (utilization > 0) { - return utilization; - } - // Otherwise, find the most constrained utilization metric. - utilization = - Envoy::Orca::getMaxUtilization(metric_names_for_computing_utilization, orca_load_report); - if (utilization > 0) { - return utilization; + if (Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.orca_weight_manager_use_named_metrics_first")) { + // By default (with runtime feature enabled) we prefer named metrics over application + // utilization, so if named metrics are not ignored if application utilization is + // available and set. See https://github.com/envoyproxy/envoy/pull/44196 for discussion. + + // Find the most constrained utilization metric in `metric_names_for_computing_utilization`. + double utilization = + Envoy::Orca::getMaxUtilization(metric_names_for_computing_utilization, orca_load_report); + if (utilization > 0) { + return utilization; + } + // If named metrics are not available, use `application_utilization` as the utilization metric. + utilization = orca_load_report.application_utilization(); + if (utilization > 0) { + return utilization; + } + } else { + // With the runtime flag disabled, we use application utilization if it is set over named + // metrics. This means that metric_names_for_computing_utilization is ignored if + // application_utilization is available and set. + + // If application_utilization is valid, use it as the utilization metric. + double utilization = orca_load_report.application_utilization(); + if (utilization > 0) { + return utilization; + } + // Otherwise, find the most constrained utilization metric. + utilization = + Envoy::Orca::getMaxUtilization(metric_names_for_computing_utilization, orca_load_report); + if (utilization > 0) { + return utilization; + } } - // If utilization is <= 0, use cpu_utilization. + // If utilization is <= 0, use `cpu_utilization` as the utilization metric. return orca_load_report.cpu_utilization(); } diff --git a/test/extensions/load_balancing_policies/client_side_weighted_round_robin/BUILD b/test/extensions/load_balancing_policies/client_side_weighted_round_robin/BUILD index bb97d790dd4fc..3fe95cd238e71 100644 --- a/test/extensions/load_balancing_policies/client_side_weighted_round_robin/BUILD +++ b/test/extensions/load_balancing_policies/client_side_weighted_round_robin/BUILD @@ -36,6 +36,7 @@ envoy_extension_cc_test( "//source/extensions/load_balancing_policies/common:orca_weight_manager_lib", "//test/extensions/load_balancing_policies/common:load_balancer_base_test_lib", "//test/mocks/stream_info:stream_info_mocks", + "//test/test_common:test_runtime_lib", "//test/test_common:utility_lib", ], ) diff --git a/test/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb_test.cc b/test/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb_test.cc index 99786a70eebab..4595a1b5be2f1 100644 --- a/test/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb_test.cc +++ b/test/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb_test.cc @@ -9,6 +9,7 @@ #include "test/extensions/load_balancing_policies/common/load_balancer_impl_base_test.h" #include "test/mocks/stream_info/mocks.h" +#include "test/test_common/test_runtime.h" #include "gmock/gmock.h" @@ -569,7 +570,6 @@ TEST(ClientSideWeightedRoundRobinLoadBalancerTest, GetUtilizationFromOrcaReport_ApplicationUtilization) { xds::data::orca::v3::OrcaLoadReport orca_load_report; orca_load_report.set_application_utilization(0.5); - orca_load_report.mutable_named_metrics()->insert({"foo", 0.3}); orca_load_report.set_cpu_utilization(0.6); EXPECT_EQ(ClientSideWeightedRoundRobinLoadBalancerFriend::getUtilizationFromOrcaReport( orca_load_report, {"named_metrics.foo"}), @@ -585,6 +585,26 @@ TEST(ClientSideWeightedRoundRobinLoadBalancerTest, GetUtilizationFromOrcaReport_ 0.3); } +TEST(ClientSideWeightedRoundRobinLoadBalancerTest, GetUtilizationFromOrcaReport_Preference) { + xds::data::orca::v3::OrcaLoadReport orca_load_report; + orca_load_report.set_application_utilization(0.5); + orca_load_report.mutable_named_metrics()->insert({"foo", 0.3}); + orca_load_report.set_cpu_utilization(0.6); + + // By default (flag enabled), named metrics win. + EXPECT_EQ(ClientSideWeightedRoundRobinLoadBalancerFriend::getUtilizationFromOrcaReport( + orca_load_report, {"named_metrics.foo"}), + 0.3); + + // With flag disabled, application utilization wins. + TestScopedRuntime scoped_runtime; + scoped_runtime.mergeValues( + {{"envoy.reloadable_features.orca_weight_manager_use_named_metrics_first", "false"}}); + EXPECT_EQ(ClientSideWeightedRoundRobinLoadBalancerFriend::getUtilizationFromOrcaReport( + orca_load_report, {"named_metrics.foo"}), + 0.5); +} + TEST(ClientSideWeightedRoundRobinLoadBalancerTest, GetUtilizationFromOrcaReport_CpuUtilization) { xds::data::orca::v3::OrcaLoadReport orca_load_report; orca_load_report.mutable_named_metrics()->insert({"bar", 0.3}); diff --git a/test/extensions/load_balancing_policies/common/BUILD b/test/extensions/load_balancing_policies/common/BUILD index edd44f08f7c80..a31541bb6c863 100644 --- a/test/extensions/load_balancing_policies/common/BUILD +++ b/test/extensions/load_balancing_policies/common/BUILD @@ -117,6 +117,7 @@ envoy_cc_test( "//test/mocks/upstream:host_mocks", "//test/mocks/upstream:priority_set_mocks", "//test/test_common:simulated_time_system_lib", + "//test/test_common:test_runtime_lib", "@xds//xds/data/orca/v3:pkg_cc_proto", ], ) diff --git a/test/extensions/load_balancing_policies/common/orca_weight_manager_test.cc b/test/extensions/load_balancing_policies/common/orca_weight_manager_test.cc index c3d1a877cf8c1..98571389526f8 100644 --- a/test/extensions/load_balancing_policies/common/orca_weight_manager_test.cc +++ b/test/extensions/load_balancing_policies/common/orca_weight_manager_test.cc @@ -11,6 +11,7 @@ #include "test/mocks/upstream/host.h" #include "test/mocks/upstream/priority_set.h" #include "test/test_common/simulated_time_system.h" +#include "test/test_common/test_runtime.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -32,7 +33,6 @@ using ::testing::Return; TEST(OrcaLoadReportHandlerTest, GetUtilizationFromOrcaReport_ApplicationUtilization) { xds::data::orca::v3::OrcaLoadReport report; report.set_application_utilization(0.5); - report.mutable_named_metrics()->insert({"foo", 0.3}); report.set_cpu_utilization(0.6); EXPECT_EQ(OrcaLoadReportHandler::getUtilizationFromOrcaReport(report, {"named_metrics.foo"}), 0.5); @@ -46,6 +46,53 @@ TEST(OrcaLoadReportHandlerTest, GetUtilizationFromOrcaReport_NamedMetrics) { 0.3); } +TEST(OrcaLoadReportHandlerTest, GetUtilizationFromOrcaReport_Preference) { + xds::data::orca::v3::OrcaLoadReport report; + report.set_application_utilization(0.5); + report.mutable_named_metrics()->insert({"foo", 0.3}); + report.set_cpu_utilization(0.6); + + // By default (flag enabled), named metrics win. + EXPECT_EQ(OrcaLoadReportHandler::getUtilizationFromOrcaReport(report, {"named_metrics.foo"}), + 0.3); + + // With flag disabled, application utilization wins. + TestScopedRuntime scoped_runtime; + scoped_runtime.mergeValues( + {{"envoy.reloadable_features.orca_weight_manager_use_named_metrics_first", "false"}}); + EXPECT_EQ(OrcaLoadReportHandler::getUtilizationFromOrcaReport(report, {"named_metrics.foo"}), + 0.5); +} + +TEST(OrcaLoadReportHandlerTest, + GetUtilizationFromOrcaReport_Preference_FlagDisabled_FallbackToNamedMetrics) { + xds::data::orca::v3::OrcaLoadReport report; + report.mutable_named_metrics()->insert({"foo", 0.3}); + report.set_cpu_utilization(0.6); + + TestScopedRuntime scoped_runtime; + scoped_runtime.mergeValues( + {{"envoy.reloadable_features.orca_weight_manager_use_named_metrics_first", "false"}}); + + // application_utilization is not set (0), so it falls back to named metrics. + EXPECT_EQ(OrcaLoadReportHandler::getUtilizationFromOrcaReport(report, {"named_metrics.foo"}), + 0.3); +} + +TEST(OrcaLoadReportHandlerTest, + GetUtilizationFromOrcaReport_Preference_FlagDisabled_FallbackToCpu) { + xds::data::orca::v3::OrcaLoadReport report; + report.set_cpu_utilization(0.6); + + TestScopedRuntime scoped_runtime; + scoped_runtime.mergeValues( + {{"envoy.reloadable_features.orca_weight_manager_use_named_metrics_first", "false"}}); + + // application_utilization and named metrics are not set, so it falls back to cpu. + EXPECT_EQ(OrcaLoadReportHandler::getUtilizationFromOrcaReport(report, {"named_metrics.foo"}), + 0.6); +} + TEST(OrcaLoadReportHandlerTest, GetUtilizationFromOrcaReport_CpuUtilizationFallback) { xds::data::orca::v3::OrcaLoadReport report; report.mutable_named_metrics()->insert({"bar", 0.3});