Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: add custom egress rules to docker-autoscaler security group #1222

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
bfa1b36
Add custom egress rules to docker-autoscaler workers security group. …
ikarlashov Jan 6, 2025
75a0114
Fix typo and formatting
ikarlashov Jan 10, 2025
5453ca1
Move inline docker-autoscaler sg rules to standalone tf resources
ikarlashov Jan 10, 2025
5b4ace4
Fix var declaration
ikarlashov Jan 10, 2025
10bf078
Merge branch 'main' into custom_sg_docker_autoscaler
ikarlashov Jan 10, 2025
2e49a4a
Add missing security group ingress rule to allow traffic within Runne…
ikarlashov Jan 10, 2025
a98a92f
Add sg rule to allow all Egress traffic between runner manager and do…
ikarlashov Jan 10, 2025
dc96e87
Set clear naming for extra rule
ikarlashov Jan 10, 2025
7eceec7
Fix formatting
ikarlashov Jan 10, 2025
6339c38
Test
ikarlashov Jan 10, 2025
f52001e
Add comment
ikarlashov Jan 10, 2025
723408f
Make protocol and description definitions required for docker-autosca…
ikarlashov Jan 16, 2025
129ecc0
Fix default value in condition for sg rules
ikarlashov Jan 16, 2025
e77f822
Change type of docker-autoscaler sg rules vars
ikarlashov Jan 16, 2025
b063255
Change validation for protocol
ikarlashov Jan 16, 2025
474d27b
Fix typo
ikarlashov Jan 16, 2025
dc15ee6
Change type to map
ikarlashov Jan 16, 2025
ec75394
Fix for_each
ikarlashov Jan 16, 2025
6916df3
Changed var names
ikarlashov Jan 16, 2025
3dee3a1
convert idx to string for for_each
ikarlashov Jan 16, 2025
e14c312
One more time
ikarlashov Jan 16, 2025
13bea38
Change sg rules type to map of objects due to conflict with extra ing…
ikarlashov Jan 16, 2025
0522d0c
Fix docker_autoscaler_internal_traffic
ikarlashov Jan 16, 2025
402fe76
Remove default for runner_worker_docker_autoscaler_ingress_rules
ikarlashov Jan 16, 2025
2b6b53b
fix typo
ikarlashov Jan 16, 2025
47aa0e7
Allow egress traffic from runner-manager to docker-autoscaler workers
ikarlashov Jan 20, 2025
4d9bf5b
Refactor runner-manager security group spec and rules
ikarlashov Jan 23, 2025
a1e891f
Fix SG rules resource spec
ikarlashov Jan 23, 2025
10f2f7f
Fix issues
ikarlashov Jan 23, 2025
8d95854
Remove typo resource
ikarlashov Jan 23, 2025
d6158d5
Fix typo
ikarlashov Jan 23, 2025
956c19b
Fix typo2
ikarlashov Jan 23, 2025
a5bd46d
Fix cspell.json
ikarlashov Jan 24, 2025
0e4fb8e
Merge branch 'main' into custom_sg_docker_autoscaler
ikarlashov Jan 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 45 additions & 31 deletions docker_autoscaler.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
# outdated docker+machine driver. The docker+machine driver is a legacy driver that is no longer maintained by GitLab.
#

resource "aws_security_group" "docker_autoscaler" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
########################################
###### Security Group and SG rules #####
########################################

description = "Docker autoscaler security group"
# Base security group
resource "aws_security_group" "docker_autoscaler" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
name_prefix = "${local.name_sg}-docker-autoscaler"
vpc_id = var.vpc_id
name = "${local.name_sg}-docker-autoscaler"
description = "Docker-autoscaler security group"

tags = merge(
local.tags,
Expand All @@ -18,40 +22,50 @@ resource "aws_security_group" "docker_autoscaler" {
)
}

resource "aws_security_group_rule" "autoscaler_egress" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
# Egress rules
resource "aws_vpc_security_group_egress_rule" "docker_autoscaler" {
for_each = var.runner_worker.type == "docker-autoscaler" ? var.runner_worker_docker_autoscaler_egress_rules : {}

security_group_id = aws_security_group.docker_autoscaler[0].id

from_port = each.value.from_port
to_port = each.value.to_port
ip_protocol = each.value.protocol

description = "All egress traffic docker autoscaler"
type = "egress"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
description = each.value.description
prefix_list_id = each.value.prefix_list_id
referenced_security_group_id = each.value.security_group
cidr_ipv4 = each.value.cidr_block
cidr_ipv6 = each.value.ipv6_cidr_block
}

resource "aws_security_group_rule" "autoscaler_ingress" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
# Ingress rules
resource "aws_vpc_security_group_ingress_rule" "docker_autoscaler" {
for_each = var.runner_worker.type == "docker-autoscaler" ? var.runner_worker_docker_autoscaler_ingress_rules : {}

security_group_id = aws_security_group.docker_autoscaler[0].id

from_port = each.value.from_port
to_port = each.value.to_port
ip_protocol = each.value.protocol

description = "All ingress traffic from runner security group"
type = "ingress"
from_port = 0
to_port = 0
protocol = "-1"
source_security_group_id = aws_security_group.runner.id
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
description = each.value.description
prefix_list_id = each.value.prefix_list_id
referenced_security_group_id = each.value.security_group

cidr_ipv4 = each.value.cidr_block
cidr_ipv6 = each.value.ipv6_cidr_block
}

resource "aws_security_group_rule" "extra_autoscaler_ingress" {
count = var.runner_worker.type == "docker-autoscaler" ? length(var.runner_worker_docker_autoscaler_asg.sg_ingresses) : 0
resource "aws_vpc_security_group_ingress_rule" "autoscaler_ingress" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

description = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].description
type = "ingress"
from_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].from_port
to_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].to_port
protocol = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].protocol
cidr_blocks = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].cidr_blocks
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
security_group_id = aws_security_group.docker_autoscaler[0].id
from_port = 0
to_port = 0
ip_protocol = "-1"
description = "Allow ALL Ingress traffic between Runner Manager and Docker-autoscaler workers security group"
referenced_security_group_id = aws_security_group.runner.id
}

####################################
Expand Down
14 changes: 13 additions & 1 deletion security_groups.tf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ resource "aws_security_group" "runner" {
)
}

# Uncomment the following block after migrating the aws_security_group.runner SG rules into aws_vpc_security_group_*_rule resources
# resource "aws_vpc_security_group_egress_rule" "runner_manager_to_docker_autoscaler_egress" {
# count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

# security_group_id = aws_security_group.runner.id
# from_port = 0
# to_port = 0
# ip_protocol = "-1"
# description = "Allow ALL Egress traffic between Runner Manager and Docker-autoscaler workers security group"
# referenced_security_group_id = aws_security_group.docker_autoscaler[0].id
# }

########################################
## Security group IDs to runner agent ##
########################################
Expand Down Expand Up @@ -66,7 +78,7 @@ resource "aws_security_group_rule" "runner_ping_group" {

resource "aws_security_group" "docker_machine" {
# checkov:skip=CKV2_AWS_5:Security group is used within an template and assigned to the docker machines
count = contains(["docker+machine", "docker-autoscaler"], var.runner_worker.type) ? 1 : 0
count = var.runner_worker.type == "docker+machine" ? 1 : 0
kayman-mk marked this conversation as resolved.
Show resolved Hide resolved

name_prefix = "${local.name_sg}-docker-machine"
vpc_id = var.vpc_id
Expand Down
125 changes: 103 additions & 22 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -428,11 +428,11 @@ variable "runner_worker_cache" {
cache. To use the same cache across multiple Runner Worker disable the creation of the cache and provide a policy and
bucket name. See the public runner example for more details."

For detailed documentation check https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-runnerscaches3-section
For detailed documentation check https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-runnerscaches3-section.

access_log_bucker_id = The ID of the bucket where the access logs are stored.
access_log_bucket_prefix = The bucket prefix for the access logs.
authentication_type = A string that declares the AuthenticationType for [runners.cache.s3]. Can either be 'iam' or 'credentials'
authentication_type = A string that declares the AuthenticationType for [runners.cache.s3]. Can either be 'iam' or 'credentials'.
bucket = Name of the cache bucket. Requires `create = false`.
bucket_prefix = Prefix for s3 cache bucket name. Requires `create = true`.
create = Boolean used to enable or disable the creation of the cache bucket.
Expand Down Expand Up @@ -609,11 +609,11 @@ variable "runner_worker_docker_machine_fleet" {

variable "runner_worker_docker_autoscaler" {
description = <<-EOT
fleeting_plugin_version = The version of aws fleeting plugin
connector_config_user = User to connect to worker machine
fleeting_plugin_version = The version of aws fleeting plugin.
connector_config_user = User to connect to worker machine.
key_pair_name = The name of the key pair used by the Runner to connect to the docker-machine Runner Workers. This variable is only supported when `enables` is set to `true`.
capacity_per_instance = The number of jobs that can be executed concurrently by a single instance.
max_use_count = Max job number that can run on a worker
max_use_count = Max job number that can run on a worker.
update_interval = The interval to check with the fleeting plugin for instance updates.
update_interval_when_expecting = The interval to check with the fleeting plugin for instance updates when expecting a state change.
instance_ready_command = Executes this command on each instance provisioned by the autoscaler to ensure that it is ready for use. A failure results in the instance being removed.
Expand All @@ -634,14 +634,14 @@ variable "runner_worker_docker_autoscaler" {
variable "runner_worker_docker_autoscaler_instance" {
description = <<-EOT
ebs_optimized = Enable EBS optimization for the Runner Worker.
http_tokens = Whether or not the metadata service requires session tokens
http_tokens = Whether or not the metadata service requires session tokens.
http_put_response_hop_limit = The desired HTTP PUT response hop limit for instance metadata requests. The larger the number, the further instance metadata requests can travel.
monitoring = Enable detailed monitoring for the Runner Worker.
private_address_only = Restrict Runner Worker to the use of a private IP address. If `runner_instance.use_private_address_only` is set to `true` (default),
root_device_name = The name of the root volume for the Runner Worker.
root_size = The size of the root volume for the Runner Worker.
start_script = Cloud-init user data that will be passed to the Runner Worker. Should not be base64 encrypted.
volume_type = The type of volume to use for the Runner Worker. `gp2`, `gp3`, `io1` or `io2` are supported
volume_type = The type of volume to use for the Runner Worker. `gp2`, `gp3`, `io1` or `io2` are supported.
volume_iops = Guaranteed IOPS for the volume. Only supported when using `gp3`, `io1` or `io2` as `volume_type`.
volume_throughput = Throughput in MB/s for the volume. Only supported when using `gp3` as `volume_type`.
EOT
Expand All @@ -664,25 +664,25 @@ EOT

variable "runner_worker_docker_autoscaler_asg" {
description = <<-EOT
enabled_metrics = List of metrics to collect.
enable_mixed_instances_policy = Make use of autoscaling-group mixed_instances_policy capacities to leverage pools and spot instances.
health_check_grace_period = Time (in seconds) after instance comes into service before checking health
health_check_type = Controls how health checking is done. Values are - EC2 and ELB
health_check_grace_period = Time (in seconds) after instance comes into service before checking health.
health_check_type = Controls how health checking is done. Values are - EC2 and ELB.
instance_refresh_min_healthy_percentage = The amount of capacity in the Auto Scaling group that must remain healthy during an instance refresh to allow the operation to continue, as a percentage of the desired capacity of the Auto Scaling group.
instance_refresh_triggers = Set of additional property names that will trigger an Instance Refresh. A refresh will always be triggered by a change in any of launch_configuration, launch_template, or mixed_instances_policy.
max_growth_rate = The maximum number of machines that can be added to the runner in parallel.
on_demand_base_capacity = Absolute minimum amount of desired capacity that must be fulfilled by on-demand instances.
on_demand_percentage_above_base_capacity = Percentage split between on-demand and Spot instances above the base on-demand capacity.
override_instance_types = List to override the instance type in the Launch Template. Allow to spread spot instances on several types, to reduce interruptions
override_instance_types = List to override the instance type in the Launch Template. Allow to spread spot instances on several types, to reduce interruptions.
profile_name = profile_name = Name of the IAM profile to attach to the Runner Workers.
sg_ingresses = Extra security group rule for workers
spot_allocation_strategy = How to allocate capacity across the Spot pools. 'lowest-price' to optimize cost, 'capacity-optimized' to reduce interruptions
spot_allocation_strategy = How to allocate capacity across the Spot pools. 'lowest-price' to optimize cost, 'capacity-optimized' to reduce interruptions.
spot_instance_pools = Number of Spot pools per availability zone to allocate capacity. EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify.
subnet_ids = The list of subnet IDs to use for the Runner Worker when the fleet mode is enabled.
types = The type of instance to use for the Runner Worker. In case of fleet mode, multiple instance types are supported.
upgrade_strategy = Auto deploy new instances when launch template changes. Can be either 'bluegreen', 'rolling' or 'off'
enabled_metrics = List of metrics to collect.
upgrade_strategy = Auto deploy new instances when launch template changes. Can be either 'bluegreen', 'rolling' or 'off'.
EOT
type = object({
enabled_metrics = optional(list(string), [])
enable_mixed_instances_policy = optional(bool, false)
health_check_grace_period = optional(number, 300)
health_check_type = optional(string, "EC2")
Expand All @@ -697,14 +697,6 @@ variable "runner_worker_docker_autoscaler_asg" {
subnet_ids = optional(list(string), [])
types = optional(list(string), ["m5.large"])
upgrade_strategy = optional(string, "rolling")
enabled_metrics = optional(list(string), [])
sg_ingresses = optional(list(object({
description = string
from_port = number
to_port = number
protocol = string
cidr_blocks = list(string)
})), [])
})
default = {}
}
Expand Down Expand Up @@ -741,6 +733,95 @@ variable "runner_worker_docker_autoscaler_role" {
default = {}
}

variable "runner_worker_docker_autoscaler_ingress_rules" {
description = "List of ingress rules for the Docker-autoscaler Runner workers"
type = list(object({
cidr_block = string
ipv6_cidr_block = string
prefix_list_id = string
from_port = number
protocol = string # Will be converted to ip_protocol
security_group = string
to_port = number
description = string
}))
kayman-mk marked this conversation as resolved.
Show resolved Hide resolved
default = [] # Empty map as per original empty list default

validation {
condition = alltrue([
for rule in values(var.runner_worker_docker_autoscaler_ingress_rules) :
can(regex("^[-1|tcp|udp|icmp|icmpv6]", rule.protocol))
])
error_message = "Protocol must be '-1', 'tcp', 'udp', 'icmp', or 'icmpv6'."
}

validation {
condition = alltrue([
for rule in values(var.runner_worker_docker_autoscaler_ingress_rules) :
(rule.cidr_block != null) ||
(rule.ipv6_cidr_block != null) ||
(rule.prefix_list_id != null) ||
(rule.security_group != null)
])
error_message = "At least one destination (cidr_block, ipv6_cidr_block, prefix_list_id, or security_group) must be specified for each rule."
}
}

variable "runner_worker_docker_autoscaler_egress_rules" {
description = "List of egress rules for the Docker-autoscaler Runner workers"
type = list(object({
cidr_block = optional(string, null)
ipv6_cidr_block = optional(string, null)
prefix_list_id = optional(string, null)
from_port = number
protocol = string
security_group = optional(string, null)
to_port = number
description = optional(string, null)
}))
default = [
{
cidr_block = "0.0.0.0/0"
ipv6_cidr_block = null
prefix_list_id = null
from_port = 443
protocol = "tcp"
security_group = null
to_port = 443
description = "Allow HTTPS egress traffic to all destinations."
},
{
cidr_block = null
ipv6_cidr_block = "::/0"
prefix_list_id = null
from_port = 443
protocol = "tcp"
security_group = null
to_port = 443
description = "Allow HTTPS egress traffic to all destinations."
}
]

validation {
condition = alltrue([
for rule in var.runner_worker_docker_autoscaler_egress_rules :
can(regex("^[-1|tcp|udp|icmp|icmpv6]$", rule.protocol))
])
error_message = "Protocol must be '-1', 'tcp', 'udp', 'icmp', or 'icmpv6'."
}

validation {
condition = alltrue([
for rule in var.runner_worker_docker_autoscaler_egress_rules :
(rule.cidr_block != null) ||
(rule.ipv6_cidr_block != null) ||
(rule.prefix_list_id != null) ||
(rule.security_group != null)
])
error_message = "At least one destination (cidr_block, ipv6_cidr_block, prefix_list_id, or security_group) must be specified for each rule."
}
}

variable "runner_worker_docker_machine_extra_egress_rules" {
description = "List of egress rules for the Runner Workers."
type = list(object({
Expand Down
Loading