diff --git a/api/autoscaling/v2/webhook_suite_test.go b/api/autoscaling/v2/webhook_suite_test.go index 8115e02e..4beb6773 100644 --- a/api/autoscaling/v2/webhook_suite_test.go +++ b/api/autoscaling/v2/webhook_suite_test.go @@ -168,7 +168,7 @@ var _ = BeforeSuite(func() { eventRecorder := mgr.GetEventRecorderFor("tortoise-controller") tortoiseService, err := tortoise.New(mgr.GetClient(), eventRecorder, config.RangeOfMinMaxReplicasRecommendationHours, config.TimeZone, config.TortoiseUpdateInterval, config.GatheringDataPeriodType) Expect(err).NotTo(HaveOccurred()) - hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 10000, 3, "") + hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 3, "", config) Expect(err).NotTo(HaveOccurred()) hpaWebhook := New(tortoiseService, hpaService) diff --git a/cmd/main.go b/cmd/main.go index c5124b24..49ac7e96 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -152,7 +152,7 @@ func main() { os.Exit(1) } - hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, config.HPATargetUtilizationMaxIncrease, config.HPATargetUtilizationUpdateInterval, config.MaximumMinReplicas, config.MaximumMaxReplicas, int32(config.MinimumMinReplicas), config.HPAExternalMetricExclusionRegex) + hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, config.HPATargetUtilizationMaxIncrease, config.HPATargetUtilizationUpdateInterval, config.MaximumMinReplicas, int32(config.MinimumMinReplicas), config.HPAExternalMetricExclusionRegex, config) if err != nil { setupLog.Error(err, "unable to start hpa service") os.Exit(1) @@ -176,7 +176,7 @@ func main() { config.MinimumMemoryRequestPerContainer, config.MaximumCPURequest, config.MaximumMemoryRequest, - config.MaximumMaxReplicas, + config.GetDefaultMaximumMaxReplica(), config.MaxAllowedScalingDownRatio, config.BufferRatioOnVerticalResource, config.FeatureFlags, diff --git a/internal/controller/tortoise_controller_test.go b/internal/controller/tortoise_controller_test.go index 93a9e33d..268ed42b 100644 --- a/internal/controller/tortoise_controller_test.go +++ b/internal/controller/tortoise_controller_test.go @@ -23,6 +23,7 @@ import ( "sigs.k8s.io/yaml" "github.com/mercari/tortoise/api/v1beta3" + configfile "github.com/mercari/tortoise/pkg/config" "github.com/mercari/tortoise/pkg/deployment" "github.com/mercari/tortoise/pkg/features" "github.com/mercari/tortoise/pkg/hpa" @@ -249,7 +250,19 @@ func startController(ctx context.Context) func() { Expect(err).ShouldNot(HaveOccurred()) cli, err := vpa.New(mgr.GetConfig(), recorder) Expect(err).ShouldNot(HaveOccurred()) - hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 10000, 3, ".*-exclude-metric") + + // Define a dummy config with maximumMaxReplica set to 10000 for the default group + defaultGroupName := "default" + dummyConfig := &configfile.Config{ + MaximumMaxReplicas: []configfile.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10000, // Set the value you need + }, + }, + // Add other default values if your function logic depends on them + } + hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 3, ".*-exclude-metric", dummyConfig) Expect(err).ShouldNot(HaveOccurred()) reconciler := &TortoiseReconciler{ Scheme: scheme, diff --git a/pkg/config/config.go b/pkg/config/config.go index 7dbf7ae8..891c9be4 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -2,10 +2,12 @@ package config import ( "fmt" + "math" "os" "time" "gopkg.in/yaml.v3" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/mercari/tortoise/pkg/features" ) @@ -166,11 +168,6 @@ type Config struct { // a tortoise will ignore `PreferredMaxReplicas`, and increase the number of replicas. // This feature is controlled by the feature flag `VerticalScalingBasedOnPreferredMaxReplicas`. PreferredMaxReplicas int `yaml:"PreferredMaxReplicas"` - // MaximumMaxReplicas is the maximum maxReplica that tortoise can give to the HPA (default: 100) - // Note that this is very dangerous. If you set this value too low, the HPA may not be able to scale up the workload. - // The motivation is to use it has a hard limit to prevent the HPA from scaling up the workload too much in cases of Tortoise's bug, abnormal traffic increase, etc. - // If some Tortoise hits this limit, the tortoise controller emits an error log, which may or may not imply you have to change this value. - MaximumMaxReplicas int32 `yaml:"MaximumMaxReplicas"` // MaximumCPURequest is the maximum CPU cores that the tortoise can give to the container resource request (default: 10) MaximumCPURequest string `yaml:"MaximumCPURequest"` // MaximumMemoryRequest is the maximum memory bytes that the tortoise can give to the container resource request (default: 10Gi) @@ -261,12 +258,43 @@ type Config struct { // IstioSidecarProxyDefaultMemory is the default Memory resource request of the istio sidecar proxy (default: 200Mi) IstioSidecarProxyDefaultMemory string `yaml:"IstioSidecarProxyDefaultMemory"` + // serviceGroups defines a list of service category names. + ServiceGroups []ServiceGroup `yaml:"ServiceGroups"` + // MaximumMaxReplicas is the maximum maxReplicas that tortoise can give to the HPA per group (default: 100) + // Note that this is very dangerous. If you set this value too low, the HPA may not be able to scale up the workload. + // The motivation is to use it has a hard limit to prevent the HPA from scaling up the workload too much in cases of Tortoise's bug, abnormal traffic increase, etc. + // If some Tortoise hits this limit, the tortoise controller emits an error log, which may or may not imply you have to change this value. + MaximumMaxReplicas []MaximumMaxReplicasPerGroup `yaml:"MaximumMaxReplicas"` + // FeatureFlags is the list of feature flags (default: empty = all alpha features are disabled) // See the list of feature flags in features.go FeatureFlags []features.FeatureFlag `yaml:"FeatureFlags"` } +type MaximumMaxReplicasPerGroup struct { + // ServiceGroupName refers to one ServiceGroup at Config.ServiceGroups + // If nil, this MaximumMaxReplica would apply to all services. + ServiceGroupName *string `yaml:"ServiceGroupName"` + + MaximumMaxReplica int32 `yaml:"MaximumMaxReplica"` +} + +// Namespace represents a Kubernetes namespace and its associated label selectors. +type Namespace struct { + Name string `yaml:"name"` // Namespace name + LabelSelectors []*metav1.LabelSelector `yaml:"labelSelectors"` // Slice of label selectors within this namespace +} + +// ServiceGroup represents a collection of services grouped together with namespace awareness. +type ServiceGroup struct { + // Name is the group's name (e.g., big-service, fintech-service, etc). + Name string `yaml:"name"` + // Namespaces represent multiple namespaces with their label selectors. + Namespaces []Namespace `yaml:"namespaces"` // A slice of Namespace structs +} + func defaultConfig() *Config { + defaultGroupName := "default" return &Config{ RangeOfMinMaxReplicasRecommendationHours: 1, GatheringDataPeriodType: "weekly", @@ -288,13 +316,22 @@ func defaultConfig() *Config { HPATargetUtilizationMaxIncrease: 5, HPATargetUtilizationUpdateInterval: time.Hour * 24, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, MaxAllowedScalingDownRatio: 0.8, IstioSidecarProxyDefaultCPU: "100m", IstioSidecarProxyDefaultMemory: "200Mi", MinimumCPULimit: "0", ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + ServiceGroups: []ServiceGroup{ + // This is an empty slice, indicating that no service groups are defined by default. + }, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + // This is the default maximum maxReplicas limit for all services. + { + ServiceGroupName: &defaultGroupName, // Applies to all services by default. + MaximumMaxReplica: 100, // Default max replica limit. + }, + }, + BufferRatioOnVerticalResource: 0.1, } } @@ -322,6 +359,16 @@ func ParseConfig(path string) (*Config, error) { return config, nil } +// GetDefaultMaxReplica returns the default maximum max replicas from the configuration. +func (cfg *Config) GetDefaultMaximumMaxReplica() int32 { + for _, maxReplicaGroup := range cfg.MaximumMaxReplicas { + if maxReplicaGroup.ServiceGroupName != nil && *maxReplicaGroup.ServiceGroupName == "default" { + return maxReplicaGroup.MaximumMaxReplica + } + } + return 100 // Choose a last resort default value if "default" is not found +} + func validate(config *Config) error { if config.RangeOfMinMaxReplicasRecommendationHours > 24 || config.RangeOfMinMaxReplicasRecommendationHours < 1 { return fmt.Errorf("RangeOfMinMaxReplicasRecommendationHours should be between 1 and 24") @@ -339,10 +386,67 @@ func validate(config *Config) error { if config.MinimumMinReplicas >= int(config.MaximumMinReplicas) { return fmt.Errorf("MinimumMinReplicas should be less than MaximumMinReplicas") } - if config.MaximumMinReplicas > config.MaximumMaxReplicas { + + // Check that there is at least one MaximumMaxReplicas + if len(config.MaximumMaxReplicas) == 0 { + return fmt.Errorf("MaximumMaxReplicas must have at least one configuration entry") + } + + // Find the minimum value of MaximumMaxReplicas across all service groups + minOfMaximumMaxReplicas := int32(math.MaxInt32) // Start with the largest possible int32 value + var defaultFound bool + for _, group := range config.MaximumMaxReplicas { + if group.MaximumMaxReplica < minOfMaximumMaxReplicas { + minOfMaximumMaxReplicas = group.MaximumMaxReplica + } + // Check for "default" entry + if group.ServiceGroupName != nil && *group.ServiceGroupName == "default" { + defaultFound = true + } + } + + // Ensure that there is an entry with "default" for MaximumMaxReplicas + if !defaultFound { + return fmt.Errorf("There must be at least one MaximumMaxReplicas entry with ServiceGroupName set to \"default\"") + } + + // Check for non-negative values + if minOfMaximumMaxReplicas < 0 { + return fmt.Errorf("MaximumMaxReplicas should contain non-negative values") + } + + // Ensure ServiceGroupNames in MaximumMaxReplicas match defined ServiceGroups + serviceGroupMap := make(map[string]bool) + for _, sg := range config.ServiceGroups { + serviceGroupMap[sg.Name] = true + } + + for _, maxReplicas := range config.MaximumMaxReplicas { + if maxReplicas.ServiceGroupName != nil { + // Allow "default" to bypass the check of matching ServiceGroups. + if *maxReplicas.ServiceGroupName == "default" { + continue + } + // If not default, ensure it exists in the serviceGroupMap. + if _, exists := serviceGroupMap[*maxReplicas.ServiceGroupName]; !exists { + return fmt.Errorf("ServiceGroupName %s in MaximumMaxReplicas is not defined in ServiceGroups", *maxReplicas.ServiceGroupName) + } + } + } + + // Ensure no duplicates in ServiceGroups + seenServiceGroups := make(map[string]bool) + for _, sg := range config.ServiceGroups { + if seenServiceGroups[sg.Name] { + return fmt.Errorf("Duplicate ServiceGroupName found: %s", sg.Name) + } + seenServiceGroups[sg.Name] = true + } + + if config.MaximumMinReplicas > minOfMaximumMaxReplicas { return fmt.Errorf("MaximumMinReplicas should be less than or equal to MaximumMaxReplicas") } - if config.PreferredMaxReplicas >= int(config.MaximumMaxReplicas) { + if config.PreferredMaxReplicas >= int(minOfMaximumMaxReplicas) { return fmt.Errorf("PreferredMaxReplicas should be less than MaximumMaxReplicas") } if config.PreferredMaxReplicas <= config.MinimumMinReplicas { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 5d4f0e39..3f804a3c 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -7,6 +7,7 @@ import ( ) func TestParseConfig(t *testing.T) { + defaultGroupName := "default" type args struct { path string } @@ -40,11 +41,17 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 1 * time.Hour, HPATargetUtilizationMaxIncrease: 10, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, - HPATargetUtilizationUpdateInterval: 3 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.5, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + HPATargetUtilizationUpdateInterval: 3 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.5, MinimumCPURequestPerContainer: map[string]string{ "istio-proxy": "100m", "hoge-agent": "120m", @@ -84,15 +91,21 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 15 * time.Second, HPATargetUtilizationMaxIncrease: 5, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, - HPATargetUtilizationUpdateInterval: 24 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.8, - MinimumCPURequestPerContainer: map[string]string{}, - MinimumMemoryRequestPerContainer: map[string]string{}, - ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + HPATargetUtilizationUpdateInterval: 24 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.8, + MinimumCPURequestPerContainer: map[string]string{}, + MinimumMemoryRequestPerContainer: map[string]string{}, + ResourceLimitMultiplier: map[string]int64{}, + BufferRatioOnVerticalResource: 0.1, }, }, { @@ -126,15 +139,21 @@ func TestParseConfig(t *testing.T) { TortoiseUpdateInterval: 15 * time.Second, HPATargetUtilizationMaxIncrease: 5, MaximumMinReplicas: 10, - MaximumMaxReplicas: 100, - HPATargetUtilizationUpdateInterval: 24 * time.Hour, - IstioSidecarProxyDefaultCPU: "100m", - IstioSidecarProxyDefaultMemory: "200Mi", - MaxAllowedScalingDownRatio: 0.8, - MinimumCPURequestPerContainer: map[string]string{}, - MinimumMemoryRequestPerContainer: map[string]string{}, - ResourceLimitMultiplier: map[string]int64{}, - BufferRatioOnVerticalResource: 0.1, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + HPATargetUtilizationUpdateInterval: 24 * time.Hour, + IstioSidecarProxyDefaultCPU: "100m", + IstioSidecarProxyDefaultMemory: "200Mi", + MaxAllowedScalingDownRatio: 0.8, + MinimumCPURequestPerContainer: map[string]string{}, + MinimumMemoryRequestPerContainer: map[string]string{}, + ResourceLimitMultiplier: map[string]int64{}, + BufferRatioOnVerticalResource: 0.1, }, }, } @@ -153,6 +172,7 @@ func TestParseConfig(t *testing.T) { } func Test_validate(t *testing.T) { + defaultGroupName := "default" tests := []struct { name string config *Config @@ -205,7 +225,12 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 2, MaximumMinReplicas: 20, - MaximumMaxReplicas: 10, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10, + }, + }, }, wantErr: true, }, @@ -217,21 +242,31 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 2, MaximumMinReplicas: 20, - MaximumMaxReplicas: 100, - PreferredMaxReplicas: 101, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + PreferredMaxReplicas: 101, }, wantErr: true, }, { - name: "invalid PreferredMaxReplicas", + name: "invalid PreferredMaxReplicas less than minimum", config: &Config{ RangeOfMinMaxReplicasRecommendationHours: 2, GatheringDataPeriodType: "daily", HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 5, MaximumMinReplicas: 20, - MaximumMaxReplicas: 100, - PreferredMaxReplicas: 4, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + PreferredMaxReplicas: 4, }, wantErr: true, }, @@ -243,9 +278,15 @@ func Test_validate(t *testing.T) { HPATargetUtilizationMaxIncrease: 99, MinimumMinReplicas: 5, MaximumMinReplicas: 20, - MaximumMaxReplicas: 100, - PreferredMaxReplicas: 6, - MaxAllowedScalingDownRatio: 1.1, + ServiceGroups: []ServiceGroup{}, + MaximumMaxReplicas: []MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 100, + }, + }, + PreferredMaxReplicas: 6, + MaxAllowedScalingDownRatio: 1.1, }, wantErr: true, }, diff --git a/pkg/hpa/service.go b/pkg/hpa/service.go index 444f98da..7c4487bb 100644 --- a/pkg/hpa/service.go +++ b/pkg/hpa/service.go @@ -24,6 +24,7 @@ import ( "github.com/mercari/tortoise/api/v1beta3" autoscalingv1beta3 "github.com/mercari/tortoise/api/v1beta3" + "github.com/mercari/tortoise/pkg/config" "github.com/mercari/tortoise/pkg/event" "github.com/mercari/tortoise/pkg/metrics" "github.com/mercari/tortoise/pkg/utils" @@ -39,8 +40,8 @@ type Service struct { tortoiseHPATargetUtilizationUpdateInterval time.Duration minimumMinReplicas int32 maximumMinReplica int32 - maximumMaxReplica int32 externalMetricExclusionRegex *regexp.Regexp + config *config.Config } func New( @@ -50,9 +51,10 @@ func New( maximumTargetResourceUtilization, tortoiseHPATargetUtilizationMaxIncrease int, tortoiseHPATargetUtilizationUpdateInterval time.Duration, - maximumMinReplica, maximumMaxReplica int32, + maximumMinReplica, minimumMinReplicas int32, externalMetricExclusionRegex string, + config *config.Config, ) (*Service, error) { var regex *regexp.Regexp if externalMetricExclusionRegex != "" { @@ -73,8 +75,8 @@ func New( tortoiseHPATargetUtilizationUpdateInterval: tortoiseHPATargetUtilizationUpdateInterval, maximumMinReplica: maximumMinReplica, minimumMinReplicas: minimumMinReplicas, - maximumMaxReplica: maximumMaxReplica, externalMetricExclusionRegex: regex, + config: config, }, nil } @@ -239,6 +241,21 @@ var globalRecommendedHPABehavior = &v2.HorizontalPodAutoscalerBehavior{ }, } +// Determine which service group is applicable for a given Tortoise using its namespace. +func determineServiceGroup(tortoise *autoscalingv1beta3.Tortoise, cfg *config.Config) string { + tortoiseNamespace := tortoise.Namespace + for _, serviceGroup := range cfg.ServiceGroups { + for _, namespace := range serviceGroup.Namespaces { + if namespace.Name == tortoiseNamespace { + klog.InfoS("Namespace matched", "serviceGroup", serviceGroup.Name, "namespace", tortoiseNamespace) + return serviceGroup.Name + } + } + } + // Returning an empty string to denote a default value when no match. + return "" +} + func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.Tortoise, replicaNum int32, now time.Time) (*v2.HorizontalPodAutoscaler, *autoscalingv1beta3.Tortoise, error) { if !HasHorizontal(tortoise) { // no need to create HPA @@ -249,6 +266,10 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To return nil, tortoise, nil } + // Logic to determine the applicable service group and max replicas. + groupName := determineServiceGroup(tortoise, c.config) + maximumMaxReplicas := c.getMaximumMaxReplicasForGroup(groupName) + hpa := &v2.HorizontalPodAutoscaler{ ObjectMeta: metav1.ObjectMeta{ Name: autoscalingv1beta3.TortoiseDefaultHPAName(tortoise.Name), @@ -261,7 +282,7 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To APIVersion: tortoise.Spec.TargetRefs.ScaleTargetRef.APIVersion, }, MinReplicas: ptr.To[int32](c.minimumMinReplicas), - MaxReplicas: c.maximumMaxReplica, + MaxReplicas: maximumMaxReplicas, Behavior: globalRecommendedHPABehavior, }, } @@ -274,6 +295,25 @@ func (c *Service) CreateHPA(ctx context.Context, tortoise *autoscalingv1beta3.To return hpa.DeepCopy(), tortoise, err } +// getMaximumMaxReplicasForGroup returns the maximum replicas for a specific service group. +// If the groupName is empty or doesn't match any named group, the function returns the default maximum replicas defined by the group with ServiceGroupName set to "default". +func (c *Service) getMaximumMaxReplicasForGroup(groupName string) int32 { + // Handle the case for the default group first + if groupName == "" { + return c.config.GetDefaultMaximumMaxReplica() + } + + // Look for a specific service group match + for _, group := range c.config.MaximumMaxReplicas { + if group.ServiceGroupName != nil && *group.ServiceGroupName == groupName { + return group.MaximumMaxReplica + } + } + + // Fallback to the default maximum if no match is found + return c.config.GetDefaultMaximumMaxReplica() +} + func (c *Service) GetHPAOnTortoiseSpec(ctx context.Context, tortoise *autoscalingv1beta3.Tortoise) (*v2.HorizontalPodAutoscaler, error) { if tortoise.Spec.TargetRefs.HorizontalPodAutoscalerName == nil { return nil, nil @@ -400,9 +440,13 @@ func (c *Service) ChangeHPAFromTortoiseRecommendation(tortoise *autoscalingv1bet recommendMax = *tortoise.Spec.MaxReplicas } - if recommendMax > c.maximumMaxReplica { - c.recorder.Event(tortoise, corev1.EventTypeWarning, event.WarningHittingHardMaxReplicaLimit, fmt.Sprintf("MaxReplica (%v) suggested from Tortoise (%s/%s) hits a cluster-wide maximum replica number (%v). It wouldn't be a problem until the replica number actually grows to %v though, you may want to reach out to your cluster admin.", recommendMax, tortoise.Namespace, tortoise.Name, c.maximumMaxReplica, c.maximumMaxReplica)) - recommendMax = c.maximumMaxReplica + // Determine the service group and the maximum replicas for that group + groupName := determineServiceGroup(tortoise, c.config) + maximumMaxReplica := c.getMaximumMaxReplicasForGroup(groupName) + + if recommendMax > maximumMaxReplica { + c.recorder.Event(tortoise, corev1.EventTypeWarning, event.WarningHittingHardMaxReplicaLimit, fmt.Sprintf("MaxReplica (%v) suggested from Tortoise (%s/%s) hits a cluster-wide maximum replica number (%v). It wouldn't be a problem until the replica number actually grows to %v though, you may want to reach out to your cluster admin.", recommendMax, tortoise.Namespace, tortoise.Name, maximumMaxReplica, maximumMaxReplica)) + recommendMax = maximumMaxReplica } hpa.Spec.MaxReplicas = recommendMax diff --git a/pkg/hpa/service_test.go b/pkg/hpa/service_test.go index 4891613f..4eefea89 100644 --- a/pkg/hpa/service_test.go +++ b/pkg/hpa/service_test.go @@ -17,6 +17,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" "github.com/mercari/tortoise/api/v1beta3" + "github.com/mercari/tortoise/pkg/config" ) func TestClient_UpdateHPAFromTortoiseRecommendation(t *testing.T) { @@ -2749,7 +2750,17 @@ func TestClient_UpdateHPAFromTortoiseRecommendation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c, err := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 50, time.Hour, 1000, 10001, 3, tt.excludeMetricRegex) + // Define a dummy config with maximumMaxReplica set to 10001 for the default group + defaultGroupName := "default" + dummyConfig := &config.Config{ + MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10001, + }, + }, + } + c, err := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 50, time.Hour, 1000, 3, tt.excludeMetricRegex, dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } @@ -3062,12 +3073,23 @@ func TestService_InitializeHPA(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, "") + // Define a dummy config with maximumMaxReplica set to 1000 for the default group + defaultGroupName := "default" + dummyConfig := &config.Config{ + MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 1000, // Set the value you need + }, + }, + // Add other default values if your function logic depends on them + } + c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } if tt.initialHPA != nil { - c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 1000, 3, "") + c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 100, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } @@ -4620,12 +4642,23 @@ func TestService_UpdateHPASpecFromTortoiseAutoscalingPolicy(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 10000, 3, "") + // Define a dummy config with maximumMaxReplica set to 10000 for the default group + defaultGroupName := "default" + dummyConfig := &config.Config{ + MaximumMaxReplicas: []config.MaximumMaxReplicasPerGroup{ + { + ServiceGroupName: &defaultGroupName, + MaximumMaxReplica: 10000, // Set the value you need + }, + }, + // Add other default values if your function logic depends on them + } + c, err := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) } if tt.initialHPA != nil { - c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 10000, 3, "") + c, err = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90, 100, time.Hour, 1000, 3, "", dummyConfig) if err != nil { t.Fatalf("New() error = %v", err) }