From 0764d7d1777f9bb15636c9b313dad308c9b19d57 Mon Sep 17 00:00:00 2001 From: Ryan Cragun Date: Mon, 9 Sep 2024 13:22:41 -0600 Subject: [PATCH] enos: poweroff and terminate instances when shutting them down (#28316) Previously our `shutdown_nodes` modules would halt the machine. While this is useful for simulating a failure it makes cleaning up the halted machines very slow in AWS. Instead, we now poweroff the machines and utilize EC2's instance poweroff handling to immediately terminate the instances. I've test both scenarios locally utilizing the change and both still work as expected. I also timed before and after and this change saves 5 MINUTES in total runtime (~40%) for the PR replication scenario. I assume it yields similar results for autopilot. Signed-off-by: Ryan Cragun --- enos/modules/shutdown_multiple_nodes/main.tf | 2 +- enos/modules/shutdown_node/main.tf | 2 +- enos/modules/target_ec2_instances/main.tf | 15 +++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/enos/modules/shutdown_multiple_nodes/main.tf b/enos/modules/shutdown_multiple_nodes/main.tf index 2cfe646c25bd..c2781cd8c40a 100644 --- a/enos/modules/shutdown_multiple_nodes/main.tf +++ b/enos/modules/shutdown_multiple_nodes/main.tf @@ -19,7 +19,7 @@ variable "old_hosts" { resource "enos_remote_exec" "shutdown_multiple_nodes" { for_each = var.old_hosts - inline = ["sudo shutdown -H --no-wall; exit 0"] + inline = ["sudo shutdown -P --no-wall; exit 0"] transport = { ssh = { diff --git a/enos/modules/shutdown_node/main.tf b/enos/modules/shutdown_node/main.tf index a077a334f9d0..045857015cdb 100644 --- a/enos/modules/shutdown_node/main.tf +++ b/enos/modules/shutdown_node/main.tf @@ -19,7 +19,7 @@ variable "host" { } resource "enos_remote_exec" "shutdown_node" { - inline = ["sudo shutdown -H --no-wall; exit 0"] + inline = ["sudo shutdown -P --no-wall; exit 0"] transport = { ssh = { diff --git a/enos/modules/target_ec2_instances/main.tf b/enos/modules/target_ec2_instances/main.tf index 68a584859b48..75d2bd55edc6 100644 --- a/enos/modules/target_ec2_instances/main.tf +++ b/enos/modules/target_ec2_instances/main.tf @@ -186,12 +186,15 @@ resource "aws_security_group" "target" { resource "aws_instance" "targets" { for_each = local.instances - ami = var.ami_id - iam_instance_profile = aws_iam_instance_profile.target.name - instance_type = local.instance_type - key_name = var.ssh_keypair - subnet_id = data.aws_subnets.vpc.ids[tonumber(each.key) % length(data.aws_subnets.vpc.ids)] - vpc_security_group_ids = [aws_security_group.target.id] + ami = var.ami_id + iam_instance_profile = aws_iam_instance_profile.target.name + // Some scenarios (autopilot, pr_replication) shutdown instances to simulate failure. In those + // cases we should terminate the instance entirely rather than get stuck in stopped limbo. + instance_initiated_shutdown_behavior = "terminate" + instance_type = local.instance_type + key_name = var.ssh_keypair + subnet_id = data.aws_subnets.vpc.ids[tonumber(each.key) % length(data.aws_subnets.vpc.ids)] + vpc_security_group_ids = [aws_security_group.target.id] tags = merge( var.common_tags,