From d7591344befefb6d7650b97f2e0b262f02ae1ead Mon Sep 17 00:00:00 2001 From: Bharadwaj Anupindi Date: Thu, 9 Jan 2025 17:29:03 +0900 Subject: [PATCH 1/5] Introduce multiple service group categories and maximumMaxReplicas for each of them --- api/autoscaling/v2/webhook_suite_test.go | 2 +- cmd/main.go | 4 +- .../controller/tortoise_controller_test.go | 15 ++- pkg/config/config.go | 122 ++++++++++++++++-- pkg/config/config_test.go | 105 ++++++++++----- pkg/hpa/service.go | 58 ++++++++- pkg/hpa/service_test.go | 43 +++++- 7 files changed, 292 insertions(+), 57 deletions(-) diff --git a/api/autoscaling/v2/webhook_suite_test.go b/api/autoscaling/v2/webhook_suite_test.go index 8115e02e..4beb6773 100644 --- a/api/autoscaling/v2/webhook_suite_test.go +++ b/api/autoscaling/v2/webhook_suite_test.go @@ -168,7 +168,7 @@ var _ = BeforeSuite(func() { eventRecorder := mgr.GetEventRecorderFor("tortoise-controller") tortoiseService, err := tortoise.New(mgr.GetClient(), eventRecorder, config.RangeOfMinMaxReplicasRecommendationHours, config.TimeZone, config.TortoiseUpdateInterval, config.GatheringDataPeriodType) Expect(err).NotTo(HaveOccurred()) - hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 10000, 3, "") + hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 3, "", config) Expect(err).NotTo(HaveOccurred()) hpaWebhook := New(tortoiseService, hpaService) diff --git a/cmd/main.go b/cmd/main.go index c5124b24..49ac7e96 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -152,7 +152,7 @@ func main() { os.Exit(1) } - hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, config.HPATargetUtilizationMaxIncrease, config.HPATargetUtilizationUpdateInterval, config.MaximumMinReplicas, config.MaximumMaxReplicas, int32(config.MinimumMinReplicas), config.HPAExternalMetricExclusionRegex) + hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, config.HPATargetUtilizationMaxIncrease, config.HPATargetUtilizationUpdateInterval, config.MaximumMinReplicas, int32(config.MinimumMinReplicas), config.HPAExternalMetricExclusionRegex, config) if err != nil { setupLog.Error(err, "unable to start hpa service") os.Exit(1) @@ -176,7 +176,7 @@ func main() { config.MinimumMemoryRequestPerContainer, config.MaximumCPURequest, config.MaximumMemoryRequest, - config.MaximumMaxReplicas, + config.GetDefaultMaximumMaxReplica(), config.MaxAllowedScalingDownRatio, config.BufferRatioOnVerticalResource, config.FeatureFlags, diff --git a/internal/controller/tortoise_controller_test.go b/internal/controller/tortoise_controller_test.go index 2f7f01e4..8708f8dc 100644 --- a/internal/controller/tortoise_controller_test.go +++ b/internal/controller/tortoise_controller_test.go @@ -24,6 +24,7 @@ import ( "sigs.k8s.io/yaml" "github.com/mercari/tortoise/api/v1beta3" + configfile "github.com/mercari/tortoise/pkg/config" "github.com/mercari/tortoise/pkg/deployment" "github.com/mercari/tortoise/pkg/features" "github.com/mercari/tortoise/pkg/hpa" @@ -259,7 +260,19 @@ func startController(ctx context.Context) func() { Expect(err).ShouldNot(HaveOccurred()) cli, err := vpa.New(mgr.GetConfig(), recorder) Expect(err).ShouldNot(HaveOccurred()) - hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 10000, 3, ".*-exclude-metric") + + // Define a dummy config with maximumMaxReplica set to 10000 for the default group + defaultGroupName := "default" + dummyConfig := &configfile.Config{ + MaximumMaxReplicas: []configfile.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10000, // Set the value you need + }, + }, + // Add other default values if your function logic depends on them + } + hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 3, ".*-exclude-metric", dummyConfig) Expect(err).ShouldNot(HaveOccurred()) reconciler := &TortoiseReconciler{ Scheme: scheme, diff --git a/pkg/config/config.go b/pkg/config/config.go index 7dbf7ae8..891c9be4 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -2,10 +2,12 @@ package config import ( "fmt" + "math" "os" "time" "gopkg.in/yaml.v3" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/mercari/tortoise/pkg/features" ) @@ -166,11 +168,6 @@ type Config struct { // a tortoise will ignore `PreferredMaxReplicas`, and increase the number of replicas. // This feature is controlled by the feature flag `VerticalScalingBasedOnPreferredMaxReplicas`. PreferredMaxReplicas int `yaml:"PreferredMaxReplicas"` - // MaximumMaxReplicas is the maximum maxReplica that tortoise can give to the HPA (default: 100) - // Note that this is very dangerous. If you set this value too low, the HPA may not be able to scale up the workload. - // The motivation is to use it has a hard limit to prevent the HPA from scaling up the workload too much in cases of Tortoise's bug, abnormal traffic increase, etc. - // If some Tortoise hits this limit, the tortoise controller emits an error log, which may or may not imply you have to change this value. - MaximumMaxReplicas int32 `yaml:"MaximumMaxReplicas"` // MaximumCPURequest is the maximum CPU cores that the tortoise can give to the container resource request (default: 10) MaximumCPURequest string `yaml:"MaximumCPURequest"` // MaximumMemoryRequest is the maximum memory bytes that the tortoise can give to the container resource request (default: 10Gi) @@ -261,12 +258,43 @@ type Config struct { // IstioSidecarProxyDefaultMemory is the default Memory resource request of the istio sidecar proxy (default: 200Mi) IstioSidecarProxyDefaultMemory string `yaml:"IstioSidecarProxyDefaultMemory"` + // serviceGroups defines a list of service category names. + ServiceGroups []ServiceGroup `yaml:"ServiceGroups"` + // MaximumMaxReplicas is the maximum maxReplicas that tortoise can give to the HPA per group (default: 100) + // Note that this is very dangerous. If you set this value too low, the HPA may not be able to scale up the workload. + // The motivation is to use it has a hard limit to prevent the HPA from scaling up the workload too much in cases of Tortoise's bug, abnormal traffic increase, etc. + // If some Tortoise hits this limit, the tortoise controller emits an error log, which may or may not imply you have to change this value. + MaximumMaxReplicas []MaximumMaxReplicasPerGroup `yaml:"MaximumMaxReplicas"` + // FeatureFlags is the list of feature flags (default: empty = all alpha features are disabled) // See the list of feature flags in features.go FeatureFlags []features.FeatureFlag `yaml:"FeatureFlags"` } +type MaximumMaxReplicasPerGroup struct { + // ServiceGroupName refers to one ServiceGroup at Config.ServiceGroups + // If nil, this MaximumMaxReplica would apply to all services. + ServiceGroupName *string `yaml:"ServiceGroupName"` + + MaximumMaxReplica int32 `yaml:"MaximumMaxReplica"` +} + +// Namespace represents a Kubernetes namespace and its associated label selectors. +type Namespace struct { + Name string `yaml:"name"` // Namespace name + LabelSelectors []*metav1.LabelSelector `yaml:"labelSelectors"` // Slice of label selectors within this namespace +} + +// ServiceGroup represents a collection of services grouped together with namespace awareness. +type ServiceGroup struct { + // Name is the group's name (e.g., big-service, fintech-service, etc). + Name string `yaml:"name"` + // Namespaces represent multiple namespaces with their label selectors. + Namespaces []Namespace `yaml:"namespaces"` // A slice of Namespace structs +} + func defaultConfig() *Config { + defaultGroupName := "default" return &Config{ RangeOfMinMaxReplicasRecommendationHours: 1, GatheringDataPeriodType: "weekly", @@ -288,13 +316,22 @@ func defaultConfig() *Config { HPATargetUtilizationMaxIncrease: 5, HPATargetUtilizationUpdateInterval: time.Hour * 24, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, MaxAllowedScalingDownRatio: 0.8, IstioSidecarProxyDefaultCPU: "100m", IstioSidecarProxyDefaultMemory: "200Mi", MinimumCPULimit: "0", ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + ServiceGroups: []ServiceGroup{ + // This is an empty slice, indicating that no service groups are defined by default. + }, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + // This is the default maximum maxReplicas limit for all services. + { + ServiceGroupName: &defaultGroupName, // Applies to all services by default. + MaximumMaxReplica: 100, // Default max replica limit. + }, + }, + BufferRatioOnVerticalResource: 0.1, } } @@ -322,6 +359,16 @@ func ParseConfig(path string) (*Config, error) { return config, nil } +// GetDefaultMaxReplica returns the default maximum max replicas from the configuration. +func (cfg *Config) GetDefaultMaximumMaxReplica() int32 { + for _, maxReplicaGroup := range cfg.MaximumMaxReplicas { + if maxReplicaGroup.ServiceGroupName != nil && *maxReplicaGroup.ServiceGroupName == "default" { + return maxReplicaGroup.MaximumMaxReplica + } + } + return 100 // Choose a last resort default value if "default" is not found +} + func validate(config *Config) error { if config.RangeOfMinMaxReplicasRecommendationHours > 24 || config.RangeOfMinMaxReplicasRecommendationHours < 1 { return fmt.Errorf("RangeOfMinMaxReplicasRecommendationHours should be between 1 and 24") @@ -339,10 +386,67 @@ func validate(config *Config) error { if config.MinimumMinReplicas >= int(config.MaximumMinReplicas) { return fmt.Errorf("MinimumMinReplicas should be less than MaximumMinReplicas") } - if config.MaximumMinReplicas > config.MaximumMaxReplicas { + + // Check that there is at least one MaximumMaxReplicas + if len(config.MaximumMaxReplicas) == 0 { + return fmt.Errorf("MaximumMaxReplicas must have at least one configuration entry") + } + + // Find the minimum value of MaximumMaxReplicas across all service groups + minOfMaximumMaxReplicas := int32(math.MaxInt32) // Start with the largest possible int32 value + var defaultFound bool + for _, group := range config.MaximumMaxReplicas { + if group.MaximumMaxReplica < minOfMaximumMaxReplicas { + minOfMaximumMaxReplicas = group.MaximumMaxReplica + } + // Check for "default" entry + if group.ServiceGroupName != nil && *group.ServiceGroupName == "default" { + defaultFound = true + } + } + + // Ensure that there is an entry with "default" for MaximumMaxReplicas + if !defaultFound { + return fmt.Errorf("There must be at least one MaximumMaxReplicas entry with ServiceGroupName set to \"default\"") + } + + // Check for non-negative values + if minOfMaximumMaxReplicas < 0 { + return fmt.Errorf("MaximumMaxReplicas should contain non-negative values") + } + + // Ensure ServiceGroupNames in MaximumMaxReplicas match defined ServiceGroups + serviceGroupMap := make(map[string]bool) + for _, sg := range config.ServiceGroups { + serviceGroupMap[sg.Name] = true + } + + for _, maxReplicas := range config.MaximumMaxReplicas { + if maxReplicas.ServiceGroupName != nil { + // Allow "default" to bypass the check of matching ServiceGroups. + if *maxReplicas.ServiceGroupName == "default" { + continue + } + // If not default, ensure it exists in the serviceGroupMap. + if _, exists := serviceGroupMap[*maxReplicas.ServiceGroupName]; !exists { + return fmt.Errorf("ServiceGroupName %s in MaximumMaxReplicas is not defined in ServiceGroups", *maxReplicas.ServiceGroupName) + } + } + } + + // Ensure no duplicates in ServiceGroups + seenServiceGroups := make(map[string]bool) + for _, sg := range config.ServiceGroups { + if seenServiceGroups[sg.Name] { + return fmt.Errorf("Duplicate ServiceGroupName found: %s", sg.Name) + } + seenServiceGroups[sg.Name] = true + } + + if config.MaximumMinReplicas > minOfMaximumMaxReplicas { return fmt.Errorf("MaximumMinReplicas should be less than or equal to MaximumMaxReplicas") } - if config.PreferredMaxReplicas >= int(config.MaximumMaxReplicas) { + if config.PreferredMaxReplicas >= int(minOfMaximumMaxReplicas) { return fmt.Errorf("PreferredMaxReplicas should be less than MaximumMaxReplicas") } if config.PreferredMaxReplicas <= config.MinimumMinReplicas { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 5d4f0e39..3f804a3c 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -7,6 +7,7 @@ import ( ) func TestParseConfig(t *testing.T) { + defaultGroupName := "default" type args struct { path string } @@ -40,11 +41,17 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 1 * time.Hour, HPATargetUtilizationMaxIncrease: 10, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, - HPATargetUtilizationUpdateInterval: 3 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.5, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + HPATargetUtilizationUpdateInterval: 3 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.5, MinimumCPURequestPerContainer: map[string]string{ "istio-proxy": "100m", "hoge-agent": "120m", @@ -84,15 +91,21 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 15 * time.Second, HPATargetUtilizationMaxIncrease: 5, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, - HPATargetUtilizationUpdateInterval: 24 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.8, - MinimumCPURequestPerContainer: map[string]string{}, - MinimumMemoryRequestPerContainer: map[string]string{}, - ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + HPATargetUtilizationUpdateInterval: 24 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.8, + MinimumCPURequestPerContainer: map[string]string{}, + MinimumMemoryRequestPerContainer: map[string]string{}, + ResourceLimitMultiplier: map[string]int64{}, + BufferRatioOnVerticalResource: 0.1, }, }, { @@ -126,15 +139,21 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 15 * time.Second, HPATargetUtilizationMaxIncrease: 5, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, - HPATargetUtilizationUpdateInterval: 24 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.8, - MinimumCPURequestPerContainer: map[string]string{}, - MinimumMemoryRequestPerContainer: map[string]string{}, - ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + HPATargetUtilizationUpdateInterval: 24 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.8, + MinimumCPURequestPerContainer: map[string]string{}, + MinimumMemoryRequestPerContainer: map[string]string{}, + ResourceLimitMultiplier: map[string]int64{}, + BufferRatioOnVerticalResource: 0.1, }, }, } @@ -153,6 +172,7 @@ func TestParseConfig(t *testing.T) { } func Test_validate(t *testing.T) { + defaultGroupName := "default" tests := []struct { name string config *Config @@ -205,7 +225,12 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 2, MaximumMinReplicas: 20, - MaximumMaxReplicas: 10, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10, + }, + }, }, wantErr: true, }, @@ -217,21 +242,31 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 2, MaximumMinReplicas: 20, - MaximumMaxReplicas: 100, - PreferredMaxReplicas: 101, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + PreferredMaxReplicas: 101, }, wantErr: true, }, { - name: "invalid PreferredMaxReplicas", + name: "invalid PreferredMaxReplicas less than minimum", config: &Config{ RangeOfMinMaxReplicasRecommendationHours: 2, GatheringDataPeriodType: "daily", HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 5, MaximumMinReplicas: 20, - MaximumMaxReplicas: 100, - PreferredMaxReplicas: 4, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + PreferredMaxReplicas: 4, }, wantErr: true, }, @@ -243,9 +278,15 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 5, MaximumMinReplicas: 20, - MaximumMaxReplicas: 100, - PreferredMaxReplicas: 6, - MaxAllowedScalingDownRatio: 1.1, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + PreferredMaxReplicas: 6, + MaxAllowedScalingDownRatio: 1.1, }, wantErr: true, }, diff --git a/pkg/hpa/service.go b/pkg/hpa/service.go index 8574982a..0df4444e 100644 --- a/pkg/hpa/service.go +++ b/pkg/hpa/service.go @@ -25,6 +25,7 @@ import ( "github.com/mercari/tortoise/api/v1beta3" autoscalingv1beta3 "github.com/mercari/tortoise/api/v1beta3" + "github.com/mercari/tortoise/pkg/config" "github.com/mercari/tortoise/pkg/event" "github.com/mercari/tortoise/pkg/metrics" "github.com/mercari/tortoise/pkg/utils" @@ -40,8 +41,8 @@ type Service struct { tortoiseHPATargetUtilizationUpdateInterval time.Duration minimumMinReplicas int32 maximumMinReplica int32 - maximumMaxReplica int32 externalMetricExclusionRegex *regexp.Regexp + config *config.Config } func New( @@ -51,9 +52,10 @@ func New( maximumTargetResourceUtilization, tortoiseHPATargetUtilizationMaxIncrease int, tortoiseHPATargetUtilizationUpdateInterval time.Duration, - maximumMinReplica, maximumMaxReplica int32, + maximumMinReplica, minimumMinReplicas int32, externalMetricExclusionRegex string, + config *config.Config, ) (*Service, error) { var regex *regexp.Regexp if externalMetricExclusionRegex != "" { @@ -74,8 +76,8 @@ func New( tortoiseHPATargetUtilizationUpdateInterval: tortoiseHPATargetUtilizationUpdateInterval, maximumMinReplica: maximumMinReplica, minimumMinReplicas: minimumMinReplicas, - maximumMaxReplica: maximumMaxReplica, externalMetricExclusionRegex: regex, + config: config, }, nil } @@ -240,6 +242,21 @@ var globalRecommendedHPABehavior = &v2.HorizontalPodAutoscalerBehavior{ }, } +// Determine which service group is applicable for a given Tortoise using its namespace. +func determineServiceGroup(tortoise *autoscalingv1beta3.Tortoise, cfg *config.Config) string { + tortoiseNamespace := tortoise.Namespace + for _, serviceGroup := range cfg.ServiceGroups { + for _, namespace := range serviceGroup.Namespaces { + if namespace.Name == tortoiseNamespace { + klog.InfoS("Namespace matched", "serviceGroup", serviceGroup.Name, "namespace", tortoiseNamespace) + return serviceGroup.Name + } + } + } + // Returning an empty string to denote a default value when no match. + return "" +} + func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.Tortoise, replicaNum int32, now time.Time) (*v2.HorizontalPodAutoscaler, *autoscalingv1beta3.Tortoise, error) { if !HasHorizontal(tortoise) { // no need to create HPA @@ -250,6 +267,10 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To return nil, tortoise, nil } + // Logic to determine the applicable service group and max replicas. + groupName := determineServiceGroup(tortoise, c.config) + maximumMaxReplicas := c.getMaximumMaxReplicasForGroup(groupName) + hpa := &v2.HorizontalPodAutoscaler{ ObjectMeta: metav1.ObjectMeta{ Name: autoscalingv1beta3.TortoiseDefaultHPAName(tortoise.Name), @@ -262,7 +283,7 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To APIVersion: tortoise.Spec.TargetRefs.ScaleTargetRef.APIVersion, }, MinReplicas: ptr.To[int32](c.minimumMinReplicas), - MaxReplicas: c.maximumMaxReplica, + MaxReplicas: maximumMaxReplicas, Behavior: globalRecommendedHPABehavior, }, } @@ -275,6 +296,25 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To return hpa.DeepCopy(), tortoise, err } +// getMaximumMaxReplicasForGroup returns the maximum replicas for a specific service group. +// If the groupName is empty or doesn't match any named group, the function returns the default maximum replicas defined by the group with ServiceGroupName set to "default". +func (c *Service) getMaximumMaxReplicasForGroup(groupName string) int32 { + // Handle the case for the default group first + if groupName == "" { + return c.config.GetDefaultMaximumMaxReplica() + } + + // Look for a specific service group match + for _, group := range c.config.MaximumMaxReplicas { + if group.ServiceGroupName != nil && *group.ServiceGroupName == groupName { + return group.MaximumMaxReplica + } + } + + // Fallback to the default maximum if no match is found + return c.config.GetDefaultMaximumMaxReplica() +} + func (c *Service) GetHPAOnTortoiseSpec(ctx context.Context, tortoise *autoscalingv1beta3.Tortoise) (*v2.HorizontalPodAutoscaler, error) { if tortoise.Spec.TargetRefs.HorizontalPodAutoscalerName == nil { return nil, nil @@ -405,9 +445,13 @@ func (c *Service) ChangeHPAFromTortoiseRecommendation(tortoise *autoscalingv1bet recommendMax = *tortoise.Spec.MaxReplicas } - if recommendMax > c.maximumMaxReplica { - c.recorder.Event(tortoise, corev1.EventTypeWarning, event.WarningHittingHardMaxReplicaLimit, fmt.Sprintf("MaxReplica (%v) suggested from Tortoise (%s/%s) hits a cluster-wide maximum replica number (%v). It wouldn't be a problem until the replica number actually grows to %v though, you may want to reach out to your cluster admin.", recommendMax, tortoise.Namespace, tortoise.Name, c.maximumMaxReplica, c.maximumMaxReplica)) - recommendMax = c.maximumMaxReplica + // Determine the service group and the maximum replicas for that group + groupName := determineServiceGroup(tortoise, c.config) + maximumMaxReplica := c.getMaximumMaxReplicasForGroup(groupName) + + if recommendMax > maximumMaxReplica { + c.recorder.Event(tortoise, corev1.EventTypeWarning, event.WarningHittingHardMaxReplicaLimit, fmt.Sprintf("MaxReplica (%v) suggested from Tortoise (%s/%s) hits a cluster-wide maximum replica number (%v). It wouldn't be a problem until the replica number actually grows to %v though, you may want to reach out to your cluster admin.", recommendMax, tortoise.Namespace, tortoise.Name, maximumMaxReplica, maximumMaxReplica)) + recommendMax = maximumMaxReplica } hpa.Spec.MaxReplicas = recommendMax diff --git a/pkg/hpa/service_test.go b/pkg/hpa/service_test.go index 248dc59b..303caa43 100644 --- a/pkg/hpa/service_test.go +++ b/pkg/hpa/service_test.go @@ -18,6 +18,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" "github.com/mercari/tortoise/api/v1beta3" + "github.com/mercari/tortoise/pkg/config" ) func TestClient_UpdateHPAFromTortoiseRecommendation(t *testing.T) { @@ -2750,7 +2751,17 @@ func TestClient_UpdateHPAFromTortoiseRecommendation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c, err := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 50, time.Hour, 1000, 10001, 3, tt.excludeMetricRegex) + // Define a dummy config with maximumMaxReplica set to 10001 for the default group + defaultGroupName := "default" + dummyConfig := &config.Config{ + MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10001, + }, + }, + } + c, err := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 50, time.Hour, 1000, 3, tt.excludeMetricRegex, dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } @@ -3063,12 +3074,23 @@ func TestService_InitializeHPA(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, "") + // Define a dummy config with maximumMaxReplica set to 1000 for the default group + defaultGroupName := "default" + dummyConfig := &config.Config{ + MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 1000, // Set the value you need + }, + }, + // Add other default values if your function logic depends on them + } + c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } if tt.initialHPA != nil { - c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, "") + c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } @@ -4621,12 +4643,23 @@ func TestService_UpdateHPASpecFromTortoiseAutoscalingPolicy(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 10000, 3, "") + // Define a dummy config with maximumMaxReplica set to 10000 for the default group + defaultGroupName := "default" + dummyConfig := &config.Config{ + MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10000, // Set the value you need + }, + }, + // Add other default values if your function logic depends on them + } + c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } if tt.initialHPA != nil { - c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 10000, 3, "") + c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } From a438211abf7beae4bd520f98bc60bc42ef1abeec Mon Sep 17 00:00:00 2001 From: Bharadwaj Anupindi Date: Wed, 15 Jan 2025 09:15:04 +0900 Subject: [PATCH 2/5] Change design to include backward compatibility --- api/autoscaling/v2/webhook_suite_test.go | 2 +- cmd/main.go | 4 +- .../controller/tortoise_controller_test.go | 15 +-- pkg/config/config.go | 66 +++-------- pkg/config/config_test.go | 105 ++++++------------ pkg/hpa/service.go | 29 +++-- pkg/hpa/service_test.go | 42 +------ 7 files changed, 77 insertions(+), 186 deletions(-) diff --git a/api/autoscaling/v2/webhook_suite_test.go b/api/autoscaling/v2/webhook_suite_test.go index 4beb6773..cb595a8e 100644 --- a/api/autoscaling/v2/webhook_suite_test.go +++ b/api/autoscaling/v2/webhook_suite_test.go @@ -168,7 +168,7 @@ var _ = BeforeSuite(func() { eventRecorder := mgr.GetEventRecorderFor("tortoise-controller") tortoiseService, err := tortoise.New(mgr.GetClient(), eventRecorder, config.RangeOfMinMaxReplicasRecommendationHours, config.TimeZone, config.TortoiseUpdateInterval, config.GatheringDataPeriodType) Expect(err).NotTo(HaveOccurred()) - hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 3, "", config) + hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 10000, 3, config.ServiceGroups, config.MaximumMaxReplicasPerService, "") Expect(err).NotTo(HaveOccurred()) hpaWebhook := New(tortoiseService, hpaService) diff --git a/cmd/main.go b/cmd/main.go index 49ac7e96..d70f1b02 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -152,7 +152,7 @@ func main() { os.Exit(1) } - hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, config.HPATargetUtilizationMaxIncrease, config.HPATargetUtilizationUpdateInterval, config.MaximumMinReplicas, int32(config.MinimumMinReplicas), config.HPAExternalMetricExclusionRegex, config) + hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, config.HPATargetUtilizationMaxIncrease, config.HPATargetUtilizationUpdateInterval, config.MaximumMinReplicas, config.MaximumMaxReplicas, int32(config.MinimumMinReplicas), config.ServiceGroups, config.MaximumMaxReplicasPerService, config.HPAExternalMetricExclusionRegex) if err != nil { setupLog.Error(err, "unable to start hpa service") os.Exit(1) @@ -176,7 +176,7 @@ func main() { config.MinimumMemoryRequestPerContainer, config.MaximumCPURequest, config.MaximumMemoryRequest, - config.GetDefaultMaximumMaxReplica(), + config.MaximumMaxReplicas, config.MaxAllowedScalingDownRatio, config.BufferRatioOnVerticalResource, config.FeatureFlags, diff --git a/internal/controller/tortoise_controller_test.go b/internal/controller/tortoise_controller_test.go index 8708f8dc..4ab48476 100644 --- a/internal/controller/tortoise_controller_test.go +++ b/internal/controller/tortoise_controller_test.go @@ -24,7 +24,7 @@ import ( "sigs.k8s.io/yaml" "github.com/mercari/tortoise/api/v1beta3" - configfile "github.com/mercari/tortoise/pkg/config" + config_file "github.com/mercari/tortoise/pkg/config" "github.com/mercari/tortoise/pkg/deployment" "github.com/mercari/tortoise/pkg/features" "github.com/mercari/tortoise/pkg/hpa" @@ -261,18 +261,7 @@ func startController(ctx context.Context) func() { cli, err := vpa.New(mgr.GetConfig(), recorder) Expect(err).ShouldNot(HaveOccurred()) - // Define a dummy config with maximumMaxReplica set to 10000 for the default group - defaultGroupName := "default" - dummyConfig := &configfile.Config{ - MaximumMaxReplicas: []configfile.MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 10000, // Set the value you need - }, - }, - // Add other default values if your function logic depends on them - } - hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 3, ".*-exclude-metric", dummyConfig) + hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 10000, 3, []config_file.ServiceGroup{}, []config_file.MaximumMaxReplicasPerGroup{}, ".*-exclude-metric") Expect(err).ShouldNot(HaveOccurred()) reconciler := &TortoiseReconciler{ Scheme: scheme, diff --git a/pkg/config/config.go b/pkg/config/config.go index 891c9be4..a0aaaec1 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -2,7 +2,6 @@ package config import ( "fmt" - "math" "os" "time" @@ -168,6 +167,11 @@ type Config struct { // a tortoise will ignore `PreferredMaxReplicas`, and increase the number of replicas. // This feature is controlled by the feature flag `VerticalScalingBasedOnPreferredMaxReplicas`. PreferredMaxReplicas int `yaml:"PreferredMaxReplicas"` + // MaximumMaxReplicas is the maximum maxReplica that tortoise can give to the HPA (default: 100) + // Note that this is very dangerous. If you set this value too low, the HPA may not be able to scale up the workload. + // The motivation is to use it has a hard limit to prevent the HPA from scaling up the workload too much in cases of Tortoise's bug, abnormal traffic increase, etc. + // If some Tortoise hits this limit, the tortoise controller emits an error log, which may or may not imply you have to change this value. + MaximumMaxReplicas int32 `yaml:"MaximumMaxReplicas"` // MaximumCPURequest is the maximum CPU cores that the tortoise can give to the container resource request (default: 10) MaximumCPURequest string `yaml:"MaximumCPURequest"` // MaximumMemoryRequest is the maximum memory bytes that the tortoise can give to the container resource request (default: 10Gi) @@ -260,11 +264,9 @@ type Config struct { // serviceGroups defines a list of service category names. ServiceGroups []ServiceGroup `yaml:"ServiceGroups"` - // MaximumMaxReplicas is the maximum maxReplicas that tortoise can give to the HPA per group (default: 100) - // Note that this is very dangerous. If you set this value too low, the HPA may not be able to scale up the workload. - // The motivation is to use it has a hard limit to prevent the HPA from scaling up the workload too much in cases of Tortoise's bug, abnormal traffic increase, etc. - // If some Tortoise hits this limit, the tortoise controller emits an error log, which may or may not imply you have to change this value. - MaximumMaxReplicas []MaximumMaxReplicasPerGroup `yaml:"MaximumMaxReplicas"` + // MaximumMaxReplicasPerService is the maximum maxReplicas that tortoise can give to the HPA per service category. + // If the service category is not found in this list, tortoise uses the default value which is the value set in MaximumMaxReplicas. + MaximumMaxReplicasPerService []MaximumMaxReplicasPerGroup `yaml:"MaximumMaxReplicasPerService"` // FeatureFlags is the list of feature flags (default: empty = all alpha features are disabled) // See the list of feature flags in features.go @@ -294,7 +296,6 @@ type ServiceGroup struct { } func defaultConfig() *Config { - defaultGroupName := "default" return &Config{ RangeOfMinMaxReplicasRecommendationHours: 1, GatheringDataPeriodType: "weekly", @@ -316,22 +317,15 @@ func defaultConfig() *Config { HPATargetUtilizationMaxIncrease: 5, HPATargetUtilizationUpdateInterval: time.Hour * 24, MaximumMinReplicas: 10, + MaximumMaxReplicas: 100, MaxAllowedScalingDownRatio: 0.8, IstioSidecarProxyDefaultCPU: "100m", IstioSidecarProxyDefaultMemory: "200Mi", MinimumCPULimit: "0", ResourceLimitMultiplier: map[string]int64{}, - ServiceGroups: []ServiceGroup{ - // This is an empty slice, indicating that no service groups are defined by default. - }, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - // This is the default maximum maxReplicas limit for all services. - { - ServiceGroupName: &defaultGroupName, // Applies to all services by default. - MaximumMaxReplica: 100, // Default max replica limit. - }, - }, - BufferRatioOnVerticalResource: 0.1, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicasPerService: []MaximumMaxReplicasPerGroup{}, + BufferRatioOnVerticalResource: 0.1, } } @@ -359,16 +353,6 @@ func ParseConfig(path string) (*Config, error) { return config, nil } -// GetDefaultMaxReplica returns the default maximum max replicas from the configuration. -func (cfg *Config) GetDefaultMaximumMaxReplica() int32 { - for _, maxReplicaGroup := range cfg.MaximumMaxReplicas { - if maxReplicaGroup.ServiceGroupName != nil && *maxReplicaGroup.ServiceGroupName == "default" { - return maxReplicaGroup.MaximumMaxReplica - } - } - return 100 // Choose a last resort default value if "default" is not found -} - func validate(config *Config) error { if config.RangeOfMinMaxReplicasRecommendationHours > 24 || config.RangeOfMinMaxReplicasRecommendationHours < 1 { return fmt.Errorf("RangeOfMinMaxReplicasRecommendationHours should be between 1 and 24") @@ -387,27 +371,12 @@ func validate(config *Config) error { return fmt.Errorf("MinimumMinReplicas should be less than MaximumMinReplicas") } - // Check that there is at least one MaximumMaxReplicas - if len(config.MaximumMaxReplicas) == 0 { - return fmt.Errorf("MaximumMaxReplicas must have at least one configuration entry") - } - // Find the minimum value of MaximumMaxReplicas across all service groups - minOfMaximumMaxReplicas := int32(math.MaxInt32) // Start with the largest possible int32 value - var defaultFound bool - for _, group := range config.MaximumMaxReplicas { + minOfMaximumMaxReplicas := config.MaximumMaxReplicas // Start with the default value of MaximumMaxReplicas + for _, group := range config.MaximumMaxReplicasPerService { if group.MaximumMaxReplica < minOfMaximumMaxReplicas { minOfMaximumMaxReplicas = group.MaximumMaxReplica } - // Check for "default" entry - if group.ServiceGroupName != nil && *group.ServiceGroupName == "default" { - defaultFound = true - } - } - - // Ensure that there is an entry with "default" for MaximumMaxReplicas - if !defaultFound { - return fmt.Errorf("There must be at least one MaximumMaxReplicas entry with ServiceGroupName set to \"default\"") } // Check for non-negative values @@ -421,13 +390,8 @@ func validate(config *Config) error { serviceGroupMap[sg.Name] = true } - for _, maxReplicas := range config.MaximumMaxReplicas { + for _, maxReplicas := range config.MaximumMaxReplicasPerService { if maxReplicas.ServiceGroupName != nil { - // Allow "default" to bypass the check of matching ServiceGroups. - if *maxReplicas.ServiceGroupName == "default" { - continue - } - // If not default, ensure it exists in the serviceGroupMap. if _, exists := serviceGroupMap[*maxReplicas.ServiceGroupName]; !exists { return fmt.Errorf("ServiceGroupName %s in MaximumMaxReplicas is not defined in ServiceGroups", *maxReplicas.ServiceGroupName) } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 3f804a3c..9d6e9809 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -7,7 +7,6 @@ import ( ) func TestParseConfig(t *testing.T) { - defaultGroupName := "default" type args struct { path string } @@ -41,17 +40,13 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 1 * time.Hour, HPATargetUtilizationMaxIncrease: 10, MaximumMinReplicas: 10, + MaximumMaxReplicas: 100, + HPATargetUtilizationUpdateInterval: 3 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.5, ServiceGroups: []ServiceGroup{}, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 100, - }, - }, - HPATargetUtilizationUpdateInterval: 3 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.5, + MaximumMaxReplicasPerService: []MaximumMaxReplicasPerGroup{}, MinimumCPURequestPerContainer: map[string]string{ "istio-proxy": "100m", "hoge-agent": "120m", @@ -91,21 +86,17 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 15 * time.Second, HPATargetUtilizationMaxIncrease: 5, MaximumMinReplicas: 10, + MaximumMaxReplicas: 100, + HPATargetUtilizationUpdateInterval: 24 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.8, + MinimumCPURequestPerContainer: map[string]string{}, + MinimumMemoryRequestPerContainer: map[string]string{}, + ResourceLimitMultiplier: map[string]int64{}, ServiceGroups: []ServiceGroup{}, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 100, - }, - }, - HPATargetUtilizationUpdateInterval: 24 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.8, - MinimumCPURequestPerContainer: map[string]string{}, - MinimumMemoryRequestPerContainer: map[string]string{}, - ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + MaximumMaxReplicasPerService: []MaximumMaxReplicasPerGroup{}, + BufferRatioOnVerticalResource: 0.1, }, }, { @@ -139,21 +130,17 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 15 * time.Second, HPATargetUtilizationMaxIncrease: 5, MaximumMinReplicas: 10, + MaximumMaxReplicas: 100, + HPATargetUtilizationUpdateInterval: 24 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.8, + MinimumCPURequestPerContainer: map[string]string{}, + MinimumMemoryRequestPerContainer: map[string]string{}, + ResourceLimitMultiplier: map[string]int64{}, ServiceGroups: []ServiceGroup{}, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 100, - }, - }, - HPATargetUtilizationUpdateInterval: 24 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.8, - MinimumCPURequestPerContainer: map[string]string{}, - MinimumMemoryRequestPerContainer: map[string]string{}, - ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + MaximumMaxReplicasPerService: []MaximumMaxReplicasPerGroup{}, + BufferRatioOnVerticalResource: 0.1, }, }, } @@ -172,7 +159,6 @@ func TestParseConfig(t *testing.T) { } func Test_validate(t *testing.T) { - defaultGroupName := "default" tests := []struct { name string config *Config @@ -225,12 +211,7 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 2, MaximumMinReplicas: 20, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 10, - }, - }, + MaximumMaxReplicas: 10, }, wantErr: true, }, @@ -242,31 +223,21 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 2, MaximumMinReplicas: 20, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 100, - }, - }, - PreferredMaxReplicas: 101, + MaximumMaxReplicas: 100, + PreferredMaxReplicas: 101, }, wantErr: true, }, { - name: "invalid PreferredMaxReplicas less than minimum", + name: "invalid PreferredMaxReplicas less than MinimumMinReplicas", config: &Config{ RangeOfMinMaxReplicasRecommendationHours: 2, GatheringDataPeriodType: "daily", HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 5, MaximumMinReplicas: 20, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 100, - }, - }, - PreferredMaxReplicas: 4, + MaximumMaxReplicas: 100, + PreferredMaxReplicas: 4, }, wantErr: true, }, @@ -278,15 +249,9 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 5, MaximumMinReplicas: 20, - ServiceGroups: []ServiceGroup{}, - MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 100, - }, - }, - PreferredMaxReplicas: 6, - MaxAllowedScalingDownRatio: 1.1, + MaximumMaxReplicas: 100, + PreferredMaxReplicas: 6, + MaxAllowedScalingDownRatio: 1.1, }, wantErr: true, }, diff --git a/pkg/hpa/service.go b/pkg/hpa/service.go index 0df4444e..86a77672 100644 --- a/pkg/hpa/service.go +++ b/pkg/hpa/service.go @@ -41,8 +41,10 @@ type Service struct { tortoiseHPATargetUtilizationUpdateInterval time.Duration minimumMinReplicas int32 maximumMinReplica int32 + maximumMaxReplica int32 + serviceGroups []config.ServiceGroup + maximumMaxReplicasPerService []config.MaximumMaxReplicasPerGroup externalMetricExclusionRegex *regexp.Regexp - config *config.Config } func New( @@ -52,10 +54,11 @@ func New( maximumTargetResourceUtilization, tortoiseHPATargetUtilizationMaxIncrease int, tortoiseHPATargetUtilizationUpdateInterval time.Duration, - maximumMinReplica, + maximumMinReplica, maximumMaxReplica int32, minimumMinReplicas int32, + serviceGroups []config.ServiceGroup, + maximumMaxReplicasPerService []config.MaximumMaxReplicasPerGroup, externalMetricExclusionRegex string, - config *config.Config, ) (*Service, error) { var regex *regexp.Regexp if externalMetricExclusionRegex != "" { @@ -76,8 +79,10 @@ func New( tortoiseHPATargetUtilizationUpdateInterval: tortoiseHPATargetUtilizationUpdateInterval, maximumMinReplica: maximumMinReplica, minimumMinReplicas: minimumMinReplicas, + maximumMaxReplica: maximumMaxReplica, + serviceGroups: serviceGroups, + maximumMaxReplicasPerService: maximumMaxReplicasPerService, externalMetricExclusionRegex: regex, - config: config, }, nil } @@ -243,9 +248,9 @@ var globalRecommendedHPABehavior = &v2.HorizontalPodAutoscalerBehavior{ } // Determine which service group is applicable for a given Tortoise using its namespace. -func determineServiceGroup(tortoise *autoscalingv1beta3.Tortoise, cfg *config.Config) string { +func (c *Service) determineServiceGroup(tortoise *autoscalingv1beta3.Tortoise) string { tortoiseNamespace := tortoise.Namespace - for _, serviceGroup := range cfg.ServiceGroups { + for _, serviceGroup := range c.serviceGroups { for _, namespace := range serviceGroup.Namespaces { if namespace.Name == tortoiseNamespace { klog.InfoS("Namespace matched", "serviceGroup", serviceGroup.Name, "namespace", tortoiseNamespace) @@ -268,7 +273,7 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To } // Logic to determine the applicable service group and max replicas. - groupName := determineServiceGroup(tortoise, c.config) + groupName := c.determineServiceGroup(tortoise) maximumMaxReplicas := c.getMaximumMaxReplicasForGroup(groupName) hpa := &v2.HorizontalPodAutoscaler{ @@ -297,22 +302,22 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To } // getMaximumMaxReplicasForGroup returns the maximum replicas for a specific service group. -// If the groupName is empty or doesn't match any named group, the function returns the default maximum replicas defined by the group with ServiceGroupName set to "default". +// If the groupName is empty or doesn't match any named group, the function returns the default maximum replicas defined by the MaximumMaxReplicas. func (c *Service) getMaximumMaxReplicasForGroup(groupName string) int32 { // Handle the case for the default group first if groupName == "" { - return c.config.GetDefaultMaximumMaxReplica() + return c.maximumMaxReplica } // Look for a specific service group match - for _, group := range c.config.MaximumMaxReplicas { + for _, group := range c.maximumMaxReplicasPerService { if group.ServiceGroupName != nil && *group.ServiceGroupName == groupName { return group.MaximumMaxReplica } } // Fallback to the default maximum if no match is found - return c.config.GetDefaultMaximumMaxReplica() + return c.maximumMaxReplica } func (c *Service) GetHPAOnTortoiseSpec(ctx context.Context, tortoise *autoscalingv1beta3.Tortoise) (*v2.HorizontalPodAutoscaler, error) { @@ -446,7 +451,7 @@ func (c *Service) ChangeHPAFromTortoiseRecommendation(tortoise *autoscalingv1bet } // Determine the service group and the maximum replicas for that group - groupName := determineServiceGroup(tortoise, c.config) + groupName := c.determineServiceGroup(tortoise) maximumMaxReplica := c.getMaximumMaxReplicasForGroup(groupName) if recommendMax > maximumMaxReplica { diff --git a/pkg/hpa/service_test.go b/pkg/hpa/service_test.go index 303caa43..2a4acf33 100644 --- a/pkg/hpa/service_test.go +++ b/pkg/hpa/service_test.go @@ -2751,17 +2751,7 @@ func TestClient_UpdateHPAFromTortoiseRecommendation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Define a dummy config with maximumMaxReplica set to 10001 for the default group - defaultGroupName := "default" - dummyConfig := &config.Config{ - MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 10001, - }, - }, - } - c, err := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 50, time.Hour, 1000, 3, tt.excludeMetricRegex, dummyConfig) + c, err := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 50, time.Hour, 1000, 10001, 3, []config.ServiceGroup{}, []config.MaximumMaxReplicasPerGroup{}, tt.excludeMetricRegex) if err != nil { t.Fatalf("New() error = %v", err) } @@ -3074,23 +3064,12 @@ func TestService_InitializeHPA(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Define a dummy config with maximumMaxReplica set to 1000 for the default group - defaultGroupName := "default" - dummyConfig := &config.Config{ - MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 1000, // Set the value you need - }, - }, - // Add other default values if your function logic depends on them - } - c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 3, "", dummyConfig) + c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, []config.ServiceGroup{}, []config.MaximumMaxReplicasPerGroup{}, "") if err != nil { t.Fatalf("New() error = %v", err) } if tt.initialHPA != nil { - c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 3, "", dummyConfig) + c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, []config.ServiceGroup{}, []config.MaximumMaxReplicasPerGroup{}, "") if err != nil { t.Fatalf("New() error = %v", err) } @@ -4643,23 +4622,12 @@ func TestService_UpdateHPASpecFromTortoiseAutoscalingPolicy(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Define a dummy config with maximumMaxReplica set to 10000 for the default group - defaultGroupName := "default" - dummyConfig := &config.Config{ - MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ - { - ServiceGroupName: &defaultGroupName, - MaximumMaxReplica: 10000, // Set the value you need - }, - }, - // Add other default values if your function logic depends on them - } - c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 3, "", dummyConfig) + c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 10000, 3, []config.ServiceGroup{}, []config.MaximumMaxReplicasPerGroup{}, "") if err != nil { t.Fatalf("New() error = %v", err) } if tt.initialHPA != nil { - c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 3, "", dummyConfig) + c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 10000, 3, []config.ServiceGroup{}, []config.MaximumMaxReplicasPerGroup{}, "") if err != nil { t.Fatalf("New() error = %v", err) } From e93ac1b6d3eedcbf5c3c40c0619c16517d0a9b99 Mon Sep 17 00:00:00 2001 From: Bharadwaj Anupindi Date: Thu, 16 Jan 2025 10:41:45 +0900 Subject: [PATCH 3/5] Field name change from Namespace to Selectors --- pkg/config/config.go | 24 +++++++++++++++--------- pkg/hpa/service.go | 7 ++++--- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index a0aaaec1..5b4c8c31 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -275,24 +275,23 @@ type Config struct { type MaximumMaxReplicasPerGroup struct { // ServiceGroupName refers to one ServiceGroup at Config.ServiceGroups - // If nil, this MaximumMaxReplica would apply to all services. ServiceGroupName *string `yaml:"ServiceGroupName"` MaximumMaxReplica int32 `yaml:"MaximumMaxReplica"` } -// Namespace represents a Kubernetes namespace and its associated label selectors. -type Namespace struct { - Name string `yaml:"name"` // Namespace name - LabelSelectors []*metav1.LabelSelector `yaml:"labelSelectors"` // Slice of label selectors within this namespace +// Selector selects a group of pods by matching its namespace and labels. +type Selector struct { + Namespace string `yaml:"Namespace"` // Namespace name + LabelSelectors []*metav1.LabelSelector `yaml:"Labels,omitempty"` // Slice of label selectors within this namespace } // ServiceGroup represents a collection of services grouped together with namespace awareness. type ServiceGroup struct { // Name is the group's name (e.g., big-service, fintech-service, etc). - Name string `yaml:"name"` - // Namespaces represent multiple namespaces with their label selectors. - Namespaces []Namespace `yaml:"namespaces"` // A slice of Namespace structs + Name string `yaml:"Name"` + // Selectors represent multiple pod groups that belong to this ServiceGroup. + Selectors []Selector `yaml:"Selectors"` // Slice of namespaces and labels within this service group } func defaultConfig() *Config { @@ -402,11 +401,18 @@ func validate(config *Config) error { seenServiceGroups := make(map[string]bool) for _, sg := range config.ServiceGroups { if seenServiceGroups[sg.Name] { - return fmt.Errorf("Duplicate ServiceGroupName found: %s", sg.Name) + return fmt.Errorf("duplicate ServiceGroupName found: %s", sg.Name) } seenServiceGroups[sg.Name] = true } + // Check all entries in MaximumMaxReplicasPerService have non-nil ServiceGroupName + for _, maxReplicas := range config.MaximumMaxReplicasPerService { + if maxReplicas.ServiceGroupName == nil { + return fmt.Errorf("ServiceGroupName should not be nil in MaximumMaxReplicasPerService entries") + } + } + if config.MaximumMinReplicas > minOfMaximumMaxReplicas { return fmt.Errorf("MaximumMinReplicas should be less than or equal to MaximumMaxReplicas") } diff --git a/pkg/hpa/service.go b/pkg/hpa/service.go index 86a77672..3d95d018 100644 --- a/pkg/hpa/service.go +++ b/pkg/hpa/service.go @@ -247,15 +247,16 @@ var globalRecommendedHPABehavior = &v2.HorizontalPodAutoscalerBehavior{ }, } -// Determine which service group is applicable for a given Tortoise using its namespace. +// Determine which service group is applicable for a given Tortoise using its selectors. func (c *Service) determineServiceGroup(tortoise *autoscalingv1beta3.Tortoise) string { tortoiseNamespace := tortoise.Namespace for _, serviceGroup := range c.serviceGroups { - for _, namespace := range serviceGroup.Namespaces { - if namespace.Name == tortoiseNamespace { + for _, selector := range serviceGroup.Selectors { + if selector.Namespace == tortoiseNamespace { klog.InfoS("Namespace matched", "serviceGroup", serviceGroup.Name, "namespace", tortoiseNamespace) return serviceGroup.Name } + // TODO(avs): Add logic for matching labels in the future. } } // Returning an empty string to denote a default value when no match. From 00134fe49a694e870f536fb1c05c49c3136a7df8 Mon Sep 17 00:00:00 2001 From: Bharadwaj Anupindi Date: Tue, 21 Jan 2025 18:36:51 +0900 Subject: [PATCH 4/5] Added test cases for scenarios with sericeGroups and MaximumMaxReplicasPerGroup --- pkg/config/config.go | 12 +- pkg/hpa/service.go | 2 +- pkg/hpa/service_test.go | 450 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 457 insertions(+), 7 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 5b4c8c31..1d896a45 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -262,7 +262,7 @@ type Config struct { // IstioSidecarProxyDefaultMemory is the default Memory resource request of the istio sidecar proxy (default: 200Mi) IstioSidecarProxyDefaultMemory string `yaml:"IstioSidecarProxyDefaultMemory"` - // serviceGroups defines a list of service category names. + // ServiceGroups defines a list of service category names. ServiceGroups []ServiceGroup `yaml:"ServiceGroups"` // MaximumMaxReplicasPerService is the maximum maxReplicas that tortoise can give to the HPA per service category. // If the service category is not found in this list, tortoise uses the default value which is the value set in MaximumMaxReplicas. @@ -275,7 +275,7 @@ type Config struct { type MaximumMaxReplicasPerGroup struct { // ServiceGroupName refers to one ServiceGroup at Config.ServiceGroups - ServiceGroupName *string `yaml:"ServiceGroupName"` + ServiceGroupName string `yaml:"ServiceGroupName"` MaximumMaxReplica int32 `yaml:"MaximumMaxReplica"` } @@ -390,9 +390,9 @@ func validate(config *Config) error { } for _, maxReplicas := range config.MaximumMaxReplicasPerService { - if maxReplicas.ServiceGroupName != nil { - if _, exists := serviceGroupMap[*maxReplicas.ServiceGroupName]; !exists { - return fmt.Errorf("ServiceGroupName %s in MaximumMaxReplicas is not defined in ServiceGroups", *maxReplicas.ServiceGroupName) + if maxReplicas.ServiceGroupName != "" { + if _, exists := serviceGroupMap[maxReplicas.ServiceGroupName]; !exists { + return fmt.Errorf("ServiceGroupName %s in MaximumMaxReplicas is not defined in ServiceGroups", maxReplicas.ServiceGroupName) } } } @@ -408,7 +408,7 @@ func validate(config *Config) error { // Check all entries in MaximumMaxReplicasPerService have non-nil ServiceGroupName for _, maxReplicas := range config.MaximumMaxReplicasPerService { - if maxReplicas.ServiceGroupName == nil { + if maxReplicas.ServiceGroupName == "" { return fmt.Errorf("ServiceGroupName should not be nil in MaximumMaxReplicasPerService entries") } } diff --git a/pkg/hpa/service.go b/pkg/hpa/service.go index 3d95d018..66dc5838 100644 --- a/pkg/hpa/service.go +++ b/pkg/hpa/service.go @@ -312,7 +312,7 @@ func (c *Service) getMaximumMaxReplicasForGroup(groupName string) int32 { // Look for a specific service group match for _, group := range c.maximumMaxReplicasPerService { - if group.ServiceGroupName != nil && *group.ServiceGroupName == groupName { + if group.ServiceGroupName != "" && group.ServiceGroupName == groupName { return group.MaximumMaxReplica } } diff --git a/pkg/hpa/service_test.go b/pkg/hpa/service_test.go index 2a4acf33..84f96cb3 100644 --- a/pkg/hpa/service_test.go +++ b/pkg/hpa/service_test.go @@ -5014,3 +5014,453 @@ func TestService_IsHpaMetricAvailable(t *testing.T) { }) } } + +func TestHPAServiceGroupReplicaLimits(t *testing.T) { + baseHPA := &v2.HorizontalPodAutoscaler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hpa", + Namespace: "default", + }, + Spec: v2.HorizontalPodAutoscalerSpec{ + MinReplicas: ptrInt32(3), + MaxReplicas: 50, + ScaleTargetRef: v2.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "test", + APIVersion: "apps/v1", + }, + }, + } + + baseTortoise := &v1beta3.Tortoise{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-tortoise", + Namespace: "default", + }, + Spec: v1beta3.TortoiseSpec{ + TargetRefs: v1beta3.TargetRefs{ + HorizontalPodAutoscalerName: ptr.To("test-hpa"), + ScaleTargetRef: v1beta3.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "test", + }, + }, + }, + } + + type args struct { + ctx context.Context + tortoise *v1beta3.Tortoise + replicaNum int32 + } + + tests := []struct { + name string + args args + initialHPA *v2.HorizontalPodAutoscaler + afterHPA *v2.HorizontalPodAutoscaler + wantTortoise *v1beta3.Tortoise + serviceGroups []config.ServiceGroup + maxReplicasPerService []config.MaximumMaxReplicasPerGroup + wantErr bool + }{ + { + name: "valid service group with specific max replicas", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 10, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{ + { + Name: "test-group", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "test-group", + MaximumMaxReplica: 30, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](10) + hpa.Spec.MaxReplicas = 10 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + { + name: "empty service groups but with maxReplicasPerService", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 5, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{}, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "non-existent-group", + MaximumMaxReplica: 30, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](5) + hpa.Spec.MaxReplicas = 5 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + { + name: "service group config with zero maxReplicas falls back to global max", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 5, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{ + { + Name: "test-group", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "test-group", + MaximumMaxReplica: 0, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](5) + hpa.Spec.MaxReplicas = 5 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, // Changed to false as zero value should fall back to global max + }, + { + name: "multiple service groups with different maxReplicas", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 8, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{ + { + Name: "group-1", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + { + Name: "group-2", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "group-1", + MaximumMaxReplica: 40, + }, + { + ServiceGroupName: "group-2", + MaximumMaxReplica: 20, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](8) + hpa.Spec.MaxReplicas = 8 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + { + name: "both service groups and maxReplicasPerService nil", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 15, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: nil, + maxReplicasPerService: nil, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](15) + hpa.Spec.MaxReplicas = 15 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + { + name: "service group defined but empty maxReplicasPerService slice", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 7, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{ + { + Name: "test-group", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{}, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](7) + hpa.Spec.MaxReplicas = 7 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + { + name: "service in different namespace than group selector", + args: args{ + ctx: context.Background(), + tortoise: func() *v1beta3.Tortoise { + t := baseTortoise.DeepCopy() + t.Namespace = "other-namespace" + return t + }(), + replicaNum: 6, + }, + initialHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Namespace = "other-namespace" + return hpa + }(), + serviceGroups: []config.ServiceGroup{ + { + Name: "test-group", + Selectors: []config.Selector{ + { + Namespace: "default", // Different namespace + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "test-group", + MaximumMaxReplica: 20, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Namespace = "other-namespace" + hpa.Spec.MinReplicas = ptr.To[int32](6) + hpa.Spec.MaxReplicas = 6 + return hpa + }(), + wantTortoise: func() *v1beta3.Tortoise { + t := baseTortoise.DeepCopy() + t.Namespace = "other-namespace" + return t + }(), + wantErr: false, + }, + { + name: "maxReplicasPerService references non-existent service group", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 4, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{ + { + Name: "existing-group", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "non-existent-group", + MaximumMaxReplica: 25, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](4) + hpa.Spec.MaxReplicas = 4 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + { + name: "duplicate service group names with different maxReplicas", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 9, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{ + { + Name: "same-group", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + { + Name: "same-group", // Duplicate name + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "same-group", + MaximumMaxReplica: 15, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](9) + hpa.Spec.MaxReplicas = 9 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + { + name: "service group with negative maxReplicas", + args: args{ + ctx: context.Background(), + tortoise: baseTortoise.DeepCopy(), + replicaNum: 5, + }, + initialHPA: baseHPA.DeepCopy(), + serviceGroups: []config.ServiceGroup{ + { + Name: "test-group", + Selectors: []config.Selector{ + { + Namespace: "default", + }, + }, + }, + }, + maxReplicasPerService: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: "test-group", + MaximumMaxReplica: -10, + }, + }, + afterHPA: func() *v2.HorizontalPodAutoscaler { + hpa := baseHPA.DeepCopy() + hpa.Spec.MinReplicas = ptr.To[int32](5) + hpa.Spec.MaxReplicas = 5 + return hpa + }(), + wantTortoise: baseTortoise.DeepCopy(), + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a fake client with the initial HPA if it exists + clientBuilder := fake.NewClientBuilder() + if tt.initialHPA != nil { + clientBuilder = clientBuilder.WithRuntimeObjects(tt.initialHPA) + } + + // Create a new service instance with the test configuration + c, err := New( + clientBuilder.Build(), + record.NewFakeRecorder(10), + 0.95, // ReplicaReductionFactor + 90, // MaximumTargetResourceUtilization + 50, // MaximumMaxReplicas (global) + time.Hour, // HPATargetUtilizationUpdateInterval + 3, // MinimumMinReplicas + 100, // MaximumMinReplicas + 70, // MinimumTargetResourceUtilization + tt.serviceGroups, // Service Groups + tt.maxReplicasPerService, // MaxReplicasPerService + "", // HPAExternalMetricExclusionRegex + ) + if err != nil { + t.Fatalf("New() error = %v", err) + } + + var givenHPA *v2.HorizontalPodAutoscaler + if tt.args.tortoise.Spec.TargetRefs.HorizontalPodAutoscalerName != nil { + givenHPA = tt.initialHPA + } + + tortoise, err := c.UpdateHPASpecFromTortoiseAutoscalingPolicy( + tt.args.ctx, + tt.args.tortoise, + givenHPA, + tt.args.replicaNum, + time.Now(), + ) + if (err != nil) != tt.wantErr { + t.Errorf("UpdateHPASpecFromTortoiseAutoscalingPolicy() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !tt.wantErr { + if d := cmp.Diff(tt.wantTortoise, tortoise, cmpopts.IgnoreTypes(metav1.Time{})); d != "" { + t.Errorf("UpdateHPASpecFromTortoiseAutoscalingPolicy() tortoise diff = %v", d) + } + + hpa := &v2.HorizontalPodAutoscaler{} + err = c.c.Get(tt.args.ctx, client.ObjectKey{Name: tt.afterHPA.Name, Namespace: tt.afterHPA.Namespace}, hpa) + if err != nil { + t.Errorf("get hpa error = %v", err) + } + if d := cmp.Diff(tt.afterHPA, hpa, + cmpopts.IgnoreFields(v2.HorizontalPodAutoscaler{}, "TypeMeta"), + cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion")); d != "" { + t.Errorf("UpdateHPASpecFromTortoiseAutoscalingPolicy() hpa diff = %v", d) + } + } + }) + } +} From c15336c3856604ed8cba35567148e9a4f8cc7e68 Mon Sep 17 00:00:00 2001 From: Bharadwaj Anupindi Date: Wed, 22 Jan 2025 10:16:34 +0900 Subject: [PATCH 5/5] fix call to New() --- pkg/hpa/service_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/hpa/service_test.go b/pkg/hpa/service_test.go index 84f96cb3..47066f99 100644 --- a/pkg/hpa/service_test.go +++ b/pkg/hpa/service_test.go @@ -5002,7 +5002,7 @@ func TestService_IsHpaMetricAvailable(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, "") + c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, []config.ServiceGroup{}, []config.MaximumMaxReplicasPerGroup{}, "") if err != nil { t.Fatalf("New() error = %v", err) } @@ -5152,7 +5152,7 @@ func TestHPAServiceGroupReplicaLimits(t *testing.T) { return hpa }(), wantTortoise: baseTortoise.DeepCopy(), - wantErr: false, // Changed to false as zero value should fall back to global max + wantErr: false, }, { name: "multiple service groups with different maxReplicas",