From 0764d7d1777f9bb15636c9b313dad308c9b19d57 Mon Sep 17 00:00:00 2001
From: Ryan Cragun <me@ryan.ec>
Date: Mon, 9 Sep 2024 13:22:41 -0600
Subject: [PATCH] enos: poweroff and terminate instances when shutting them
 down (#28316)

Previously our `shutdown_nodes` modules would halt the machine. While
this is useful for simulating a failure it makes cleaning up the halted
machines very slow in AWS.

Instead, we now poweroff the machines and utilize EC2's instance
poweroff handling to immediately terminate the instances.

I've test both scenarios locally utilizing the change and both still
work as expected. I also timed before and after and this change saves 5
MINUTES in total runtime (~40%) for the PR replication scenario. I assume
it yields similar results for autopilot.

Signed-off-by: Ryan Cragun <me@ryan.ec>
---
 enos/modules/shutdown_multiple_nodes/main.tf |  2 +-
 enos/modules/shutdown_node/main.tf           |  2 +-
 enos/modules/target_ec2_instances/main.tf    | 15 +++++++++------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/enos/modules/shutdown_multiple_nodes/main.tf b/enos/modules/shutdown_multiple_nodes/main.tf
index 2cfe646c25bd..c2781cd8c40a 100644
--- a/enos/modules/shutdown_multiple_nodes/main.tf
+++ b/enos/modules/shutdown_multiple_nodes/main.tf
@@ -19,7 +19,7 @@ variable "old_hosts" {
 
 resource "enos_remote_exec" "shutdown_multiple_nodes" {
   for_each = var.old_hosts
-  inline   = ["sudo shutdown -H --no-wall; exit 0"]
+  inline   = ["sudo shutdown -P --no-wall; exit 0"]
 
   transport = {
     ssh = {
diff --git a/enos/modules/shutdown_node/main.tf b/enos/modules/shutdown_node/main.tf
index a077a334f9d0..045857015cdb 100644
--- a/enos/modules/shutdown_node/main.tf
+++ b/enos/modules/shutdown_node/main.tf
@@ -19,7 +19,7 @@ variable "host" {
 }
 
 resource "enos_remote_exec" "shutdown_node" {
-  inline = ["sudo shutdown -H --no-wall; exit 0"]
+  inline = ["sudo shutdown -P --no-wall; exit 0"]
 
   transport = {
     ssh = {
diff --git a/enos/modules/target_ec2_instances/main.tf b/enos/modules/target_ec2_instances/main.tf
index 68a584859b48..75d2bd55edc6 100644
--- a/enos/modules/target_ec2_instances/main.tf
+++ b/enos/modules/target_ec2_instances/main.tf
@@ -186,12 +186,15 @@ resource "aws_security_group" "target" {
 resource "aws_instance" "targets" {
   for_each = local.instances
 
-  ami                    = var.ami_id
-  iam_instance_profile   = aws_iam_instance_profile.target.name
-  instance_type          = local.instance_type
-  key_name               = var.ssh_keypair
-  subnet_id              = data.aws_subnets.vpc.ids[tonumber(each.key) % length(data.aws_subnets.vpc.ids)]
-  vpc_security_group_ids = [aws_security_group.target.id]
+  ami                  = var.ami_id
+  iam_instance_profile = aws_iam_instance_profile.target.name
+  // Some scenarios (autopilot, pr_replication) shutdown instances to simulate failure. In those
+  // cases we should terminate the instance entirely rather than get stuck in stopped limbo.
+  instance_initiated_shutdown_behavior = "terminate"
+  instance_type                        = local.instance_type
+  key_name                             = var.ssh_keypair
+  subnet_id                            = data.aws_subnets.vpc.ids[tonumber(each.key) % length(data.aws_subnets.vpc.ids)]
+  vpc_security_group_ids               = [aws_security_group.target.id]
 
   tags = merge(
     var.common_tags,