From dc8660191c635b4c5e3e4b49ada5d4202cdfa645 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 10 Dec 2024 08:04:55 -0800 Subject: [PATCH 1/5] change retry schedule --- app/celery/provider_tasks.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index 1e6c7e96f..2cd006927 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -1,5 +1,6 @@ import json import os +import random from datetime import timedelta from botocore.exceptions import ClientError @@ -29,8 +30,7 @@ @notify_celery.task( bind=True, name="check_sms_delivery_receipt", - max_retries=48, - default_retry_delay=300, + max_retries=72, ) def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): """ @@ -62,7 +62,10 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): carrier=carrier, provider_response=provider_response, ) - raise self.retry(exc=ntfe) + base_delay = 3600 # one hour + jitter = random.randint(-1200, +1200) # plus or minus 20 minutes + retry_delay = base_delay + jitter + raise self.retry(countdown=retry_delay, exc=ntfe) except ClientError as err: # Probably a ThrottlingException but could be something else error_code = err.response["Error"]["Code"] @@ -77,7 +80,10 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): carrier=carrier, provider_response=provider_response, ) - raise self.retry(exc=err) + base_delay = 3600 # one hour + jitter = random.randint(-1200, +1200) # plus or minus 20 minutes + retry_delay = base_delay + jitter + raise self.retry(countdown=retry_delay, exc=err) if status == "success": status = NotificationStatus.DELIVERED From 06725af4173a0ec45ea2a9e1219da3f322825785 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 10 Dec 2024 08:49:07 -0800 Subject: [PATCH 2/5] try to bypass static scan false positive --- app/celery/provider_tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index 2cd006927..b88bf741c 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -63,7 +63,7 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): provider_response=provider_response, ) base_delay = 3600 # one hour - jitter = random.randint(-1200, +1200) # plus or minus 20 minutes + jitter = random.randint(-1200, +1200) # noqa retry_delay = base_delay + jitter raise self.retry(countdown=retry_delay, exc=ntfe) except ClientError as err: @@ -81,7 +81,7 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): provider_response=provider_response, ) base_delay = 3600 # one hour - jitter = random.randint(-1200, +1200) # plus or minus 20 minutes + jitter = random.randint(-1200, +1200) # noqa retry_delay = base_delay + jitter raise self.retry(countdown=retry_delay, exc=err) From 50aeb0ab0f52180ed2117faa3460952726ff892a Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 10 Dec 2024 08:53:38 -0800 Subject: [PATCH 3/5] try to bypass static scan false positive --- app/celery/provider_tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index b88bf741c..4bc8b99e1 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -63,7 +63,7 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): provider_response=provider_response, ) base_delay = 3600 # one hour - jitter = random.randint(-1200, +1200) # noqa + jitter = random.randint(-1200, +1200) # nosec B311 retry_delay = base_delay + jitter raise self.retry(countdown=retry_delay, exc=ntfe) except ClientError as err: @@ -81,7 +81,7 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): provider_response=provider_response, ) base_delay = 3600 # one hour - jitter = random.randint(-1200, +1200) # noqa + jitter = random.randint(-1200, +1200) # nosec B311 retry_delay = base_delay + jitter raise self.retry(countdown=retry_delay, exc=err) From fede173a3e05b0f23bda88f6cf3c41b8537b3e71 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 10 Dec 2024 10:39:26 -0800 Subject: [PATCH 4/5] change worker_max_tasks_per_child to 2000 --- app/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/config.py b/app/config.py index 12159e289..1ec8abd59 100644 --- a/app/config.py +++ b/app/config.py @@ -167,7 +167,7 @@ class Config(object): current_minute = (datetime.now().minute + 1) % 60 CELERY = { - "worker_max_tasks_per_child": 500, + "worker_max_tasks_per_child": 2000, "broker_url": REDIS_URL, "broker_transport_options": { "visibility_timeout": 310, From bdb73e9db29997cf3c14a487fbb8149b39ddb1da Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 10 Dec 2024 12:26:46 -0800 Subject: [PATCH 5/5] fix time limit in checking delivery receipts --- app/clients/cloudwatch/aws_cloudwatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py index 36bcf5dca..8ef34abac 100644 --- a/app/clients/cloudwatch/aws_cloudwatch.py +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -158,7 +158,7 @@ def check_sms(self, message_id, notification_id, created_at): message["delivery"].get("phoneCarrier", "Unknown Carrier"), ) - if time_now > (created_at + timedelta(hours=3)): + if time_now > (created_at + timedelta(hours=73)): # see app/models.py Notification. This message corresponds to "permanent-failure", # but we are copy/pasting here to avoid circular imports. return "failure", "Unable to find carrier response."