Skip to content

Commit

Permalink
Prevent exiting in case there are placement errors till timeout is re…
Browse files Browse the repository at this point in the history
…ached (GetSimpl#33)

* Prevent exiting in case there are placement errors till timeout is reached

* Fix message for assertion

* Raise exception if deployment fails

* Simplify logic to determine failed deployments

* Fix tests failing due to ordering of resources

* Remove unnecessary print statement

* Fix pending fargate tests

* Fix failing tests
  • Loading branch information
aswinkarthik authored Aug 20, 2020
1 parent 0468e2f commit edfd5a3
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 96 deletions.
38 changes: 19 additions & 19 deletions cloudlift/deployment/deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,14 @@ def deploy_new_version(client, cluster_name, ecs_service_name,
task_definition.apply_container_environment(container, env_config)
print_task_diff(ecs_service_name, task_definition.diff, color)
new_task_definition = deployment.update_task_definition(task_definition)
response = deploy_and_wait(deployment, new_task_definition, color, timeout_seconds)
if response:
log_bold(ecs_service_name + " Deployed successfully.")
else:
log_err(ecs_service_name + " Deployment failed.")
return response

deployment_succeeded = deploy_and_wait(deployment, new_task_definition, color, timeout_seconds)

if not deployment_succeeded:
record_deployment_failure_metric(deployment.cluster_name, deployment.service_name)
raise UnrecoverableException(ecs_service_name + " Deployment failed.")

log_bold(ecs_service_name + " Deployed successfully.")


def deploy_and_wait(deployment, new_task_definition, color, timeout_seconds):
Expand Down Expand Up @@ -95,24 +97,22 @@ def make_container_defn_env_conf(service_config, environment_config):


def wait_for_finish(action, existing_events, color, deploy_end_time):
waiting = True
while waiting:
sleep(1)
while time() <= deploy_end_time:
service = action.get_service()
existing_events = fetch_and_print_new_events(
service,
existing_events,
color
)
waiting = not is_deployed(service['deployments']) and not service.errors
if time() > deploy_end_time:
log_err("Deploy timed out!")
record_deployment_failure_metric(action.cluster_name, action.service_name)
return False
if service.errors:
log_err(str(service.errors))
return False
return True


if is_deployed(service['deployments']):
return True

sleep(1)

log_err("Deployment timed out!")
return False


def record_deployment_failure_metric(cluster_name, service_name):
Expand All @@ -121,7 +121,7 @@ def record_deployment_failure_metric(cluster_name, service_name):
Namespace='ECS/DeploymentMetrics',
MetricData=[
{
"MetricName": 'TimedOutCloudliftDeployments',
"MetricName": 'FailedCloudliftDeployments',
"Value": 1,
"Timestamp": datetime.utcnow(),
"Dimensions": [
Expand Down
4 changes: 2 additions & 2 deletions cloudlift/deployment/service_template_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def _add_service_alarms(self, svc):
)
self.template.add_resource(ecs_high_memory_alarm)
cloudlift_timedout_deployments_alarm = Alarm(
'TimedOutCloudliftDeployments' + str(svc.name),
'FailedCloudliftDeployments' + str(svc.name),
EvaluationPeriods=1,
Dimensions=[
MetricDimension(
Expand All @@ -135,7 +135,7 @@ def _add_service_alarms(self, svc):
ComparisonOperator='GreaterThanThreshold',
Statistic='Average',
Threshold='0',
MetricName='TimedOutCloudliftDeployments',
MetricName='FailedCloudliftDeployments',
TreatMissingData='notBreaching'
)
self.template.add_resource(cloudlift_timedout_deployments_alarm)
Expand Down
2 changes: 1 addition & 1 deletion cloudlift/version/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = '1.8.1'
VERSION = '1.8.2'
170 changes: 166 additions & 4 deletions test/deployment/deployer_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from _datetime import datetime
from _datetime import datetime, timedelta
from dateutil.tz.tz import tzlocal
from unittest import TestCase
from unittest.mock import patch, MagicMock, sentinel

from cloudlift.deployment.deployer import is_deployed, record_deployment_failure_metric
from cloudlift.deployment.deployer import is_deployed, \
record_deployment_failure_metric, deploy_and_wait
from cloudlift.deployment.ecs import EcsService, EcsTaskDefinition


class TestDeployer(TestCase):
Expand Down Expand Up @@ -36,6 +38,166 @@ def test_is_deployed_considering_only_primary_deployment(self):
assert not is_deployed(deployments)


class TestDeployAndWait(TestCase):
@staticmethod
def create_ecs_service_with_status(status):
return EcsService('cluster-testing', status)

def test_deploy_and_wait_successful_run(self):
deployment = MagicMock()
deployment.get_service.side_effect = [
self.create_ecs_service_with_status({
'events': [
{'message': 'event1', 'createdAt': datetime.now()}
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 0}
]
}),
self.create_ecs_service_with_status({
'events': [
{'message': 'event2', 'createdAt': datetime.now()}
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 5}
]
})
]

new_task_definition = EcsTaskDefinition({'containerDefinitions': []})
color = "green"
timeout_seconds = 3

self.assertTrue(
deploy_and_wait(deployment, new_task_definition, color, timeout_seconds),
"expected deployment to be successful"
)

deployment.deploy.assert_called_with(new_task_definition)

@patch("cloudlift.deployment.deployer.log_err")
def test_deploy_and_wait_timeout(self, mock_log_err):
deployment = MagicMock()
deployment.get_service.return_value = self.create_ecs_service_with_status({
'events': [
{'message': 'event1', 'createdAt': datetime.now()}
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 0}
]
})

new_task_definition = EcsTaskDefinition({'containerDefinitions': []})
color = "green"
timeout_seconds = 2

self.assertFalse(
deploy_and_wait(deployment, new_task_definition, color, timeout_seconds),
"expected deployment to fail"
)

deployment.deploy.assert_called_with(new_task_definition)
mock_log_err.assert_called_with('Deployment timed out!')

@patch("cloudlift.deployment.deployer.log_err")
def test_deploy_and_wait_unable_to_place_tasks_initially_succeeds_eventually(self, mock_log_err):
deployment = MagicMock()
start_time = datetime.now(tz=tzlocal())
deployment.get_service.side_effect = [
self.create_ecs_service_with_status({
'events': [
{'message': 'unable to place tasks due to memory', 'createdAt': start_time},
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 0,
'createdAt': start_time - timedelta(seconds=5),
'updatedAt': start_time - timedelta(seconds=5)}
]
}),
self.create_ecs_service_with_status({
'events': [
{'message': 'unable to place tasks due to memory', 'createdAt': start_time + timedelta(seconds=1)},
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 0,
'createdAt': start_time - timedelta(seconds=5),
'updatedAt': start_time - timedelta(seconds=2)}
]
}),
self.create_ecs_service_with_status({
'events': [
{'message': 'started tasks', 'createdAt': start_time + timedelta(seconds=2)},
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 5,
'createdAt': start_time - timedelta(seconds=5),
'updatedAt': start_time - timedelta(seconds=1)}
]
}),
]

new_task_definition = EcsTaskDefinition({'containerDefinitions': []})
color = "green"
timeout_seconds = 4

self.assertTrue(
deploy_and_wait(deployment, new_task_definition, color, timeout_seconds),
"expected deployment to pass"
)

deployment.deploy.assert_called_with(new_task_definition)
mock_log_err.assert_not_called()

@patch("cloudlift.deployment.deployer.log_err")
def test_deploy_and_wait_unable_to_place_tasks_till_timeout(self, mock_log_err):
deployment = MagicMock()
start_time = datetime.now(tz=tzlocal())
deployment.get_service.side_effect = [
self.create_ecs_service_with_status({
'events': [
{'message': 'unable to place tasks due to memory', 'createdAt': start_time},
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 0,
'createdAt': start_time - timedelta(seconds=5),
'updatedAt': start_time - timedelta(seconds=5)}
]
}),
self.create_ecs_service_with_status({
'events': [
{'message': 'unable to place tasks due to memory', 'createdAt': start_time + timedelta(seconds=1)},
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 0,
'createdAt': start_time - timedelta(seconds=5),
'updatedAt': start_time - timedelta(seconds=4)}
]
}),
self.create_ecs_service_with_status({
'events': [
{'message': 'unable to place tasks due to memory', 'createdAt': start_time + timedelta(seconds=2)},
],
'deployments': [
{'status': 'PRIMARY', 'desiredCount': 5, 'runningCount': 0,
'createdAt': start_time - timedelta(seconds=5),
'updatedAt': start_time - timedelta(seconds=3)}
]
}),
]

new_task_definition = EcsTaskDefinition({'containerDefinitions': []})
color = "green"
timeout_seconds = 2

self.assertFalse(
deploy_and_wait(deployment, new_task_definition, color, timeout_seconds),
"expected deployment to fail"
)

deployment.deploy.assert_called_with(new_task_definition)
mock_log_err.assert_called_with('Deployment timed out!')


@patch('cloudlift.deployment.deployer.datetime')
@patch('cloudlift.deployment.deployer.boto3.client')
def test_create_deployment_timeout_alarm(mock_boto3_client, dt):
Expand All @@ -50,7 +212,7 @@ def test_create_deployment_timeout_alarm(mock_boto3_client, dt):
Namespace='ECS/DeploymentMetrics',
MetricData=[
{
"MetricName": 'TimedOutCloudliftDeployments',
"MetricName": 'FailedCloudliftDeployments',
"Value": 1,
"Timestamp": now,
"Dimensions": [
Expand Down
5 changes: 2 additions & 3 deletions test/deployment/service_template_generator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@

from cloudlift.config import ServiceConfiguration
from cloudlift.deployment.service_template_generator import ServiceTemplateGenerator
from cloudlift.version import VERSION

def mocked_service_config():
return {
"cloudlift_version": VERSION,
"cloudlift_version": 'test-version',
"notifications_arn": "some",
"services": {
"Dummy": {
Expand Down Expand Up @@ -41,7 +40,7 @@ def mocked_service_config():

def mocked_fargate_service_config():
return {
"cloudlift_version": VERSION,
"cloudlift_version": 'test-version',
"notifications_arn": "some",
"services": {
"DummyFargateRunSidekiqsh": {
Expand Down
13 changes: 8 additions & 5 deletions test/deployment/service_updater_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@ def test_build_command_without_build_args(self):
assert 'docker build -t test:v1 .' == su._build_command("test:v1")

def test_build_command_with_build_args(self):
su = ServiceUpdater("dummy", "test", None, None, {"SSH_KEY": "\"`cat ~/.ssh/id_rsa`\"", "A": "1"})
assert 'docker build -t test:v1 --build-arg SSH_KEY="`cat ~/.ssh/id_rsa`" --build-arg A=1 .' == su._build_command("test:v1")
su = ServiceUpdater("dummy", "test", None, None, build_args={"SSH_KEY": "\"`cat ~/.ssh/id_rsa`\"", "A": "1"})
assert su._build_command("test:v1") == 'docker build -t test:v1 --build-arg SSH_KEY="`cat ~/.ssh/id_rsa`"' \
' --build-arg A=1 .'

def test_use_dockerfile_with_build_args(self):
su = ServiceUpdater("dummy", "test", None, None, {"SSH_KEY": "\"`cat ~/.ssh/id_rsa`\"", "A": "1"}, 'CustomDockerfile')
assert 'docker build -f CustomDockerfile -t test:v1 --build-arg SSH_KEY="`cat ~/.ssh/id_rsa`" --build-arg A=1 .' == su._build_command("test:v1")
su = ServiceUpdater("dummy", "test", None, None, build_args={"SSH_KEY": "\"`cat ~/.ssh/id_rsa`\"", "A": "1"},
dockerfile='CustomDockerfile')
assert su._build_command("test:v1") == 'docker build -f CustomDockerfile -t test:v1 ' \
'--build-arg SSH_KEY="`cat ~/.ssh/id_rsa`" --build-arg A=1 .'

def test_build_command_with_dockerfile_without_build_args(self):
su = ServiceUpdater("dummy", "test", None, None, None, 'CustomDockerfile')
su = ServiceUpdater("dummy", "test", None, None, None, dockerfile='CustomDockerfile')
assert 'docker build -f CustomDockerfile -t test:v1 .' == su._build_command("test:v1")
Loading

0 comments on commit edfd5a3

Please sign in to comment.