From 8aa19807f1abf2303524e3b901ad0d77df8805cd Mon Sep 17 00:00:00 2001
From: Carlo Costino <carlo.costino@gsa.gov>
Date: Fri, 22 Nov 2024 10:51:14 -0500
Subject: [PATCH 1/3] Add redis ID to ignore vulnerability list in pip-audit

This changeset adds a Python vulnerability that we need to ignore because it was incorrectly applied to the Python Redis module.  This is a vulnerability with an older version of Redis itself, not the Python module.

Signed-off-by: Carlo Costino <carlo.costino@gsa.gov>
---
 .github/workflows/checks.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 8324e6053..5244276bd 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -90,6 +90,8 @@ jobs:
       - uses: pypa/gh-action-pip-audit@v1.0.8
         with:
           inputs: requirements.txt
+          ignore-vulns: |
+            PYSEC-2022-43162
 
   static-scan:
     runs-on: ubuntu-latest

From 7446aed2d1925c594b6c1c0693a58e3d600955d3 Mon Sep 17 00:00:00 2001
From: Kenneth Kehl <@kkehl@flexion.us>
Date: Mon, 25 Nov 2024 07:29:00 -0800
Subject: [PATCH 2/3] remove excess log lines from s3

---
 app/aws/s3.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/aws/s3.py b/app/aws/s3.py
index f83b9059d..e0022f20b 100644
--- a/app/aws/s3.py
+++ b/app/aws/s3.py
@@ -23,7 +23,7 @@
 
 
 def set_job_cache(key, value):
-    current_app.logger.info(f"Setting {key} in the job_cache.")
+    current_app.logger.debug(f"Setting {key} in the job_cache.")
     job_cache = current_app.config["job_cache"]
     job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60)
 
@@ -34,14 +34,14 @@ def get_job_cache(key):
     if ret is None:
         current_app.logger.warning(f"Could not find {key} in the job_cache.")
     else:
-        current_app.logger.info(f"Got {key} from job_cache.")
+        current_app.logger.debug(f"Got {key} from job_cache.")
     return ret
 
 
 def len_job_cache():
     job_cache = current_app.config["job_cache"]
     ret = len(job_cache)
-    current_app.logger.info(f"Length of job_cache is {ret}")
+    current_app.logger.debug(f"Length of job_cache is {ret}")
     return ret
 
 
@@ -53,7 +53,7 @@ def clean_cache():
         if expiry_time < current_time:
             keys_to_delete.append(key)
 
-    current_app.logger.info(
+    current_app.logger.debug(
         f"Deleting the following keys from the job_cache: {keys_to_delete}"
     )
     for key in keys_to_delete:
@@ -139,7 +139,7 @@ def cleanup_old_s3_objects():
 
                     try:
                         remove_csv_object(obj["Key"])
-                        current_app.logger.info(
+                        current_app.logger.debug(
                             f"#delete-old-s3-objects Deleted: {obj['LastModified']} {obj['Key']}"
                         )
                     except botocore.exceptions.ClientError:
@@ -287,7 +287,7 @@ def file_exists(file_location):
 
 
 def get_job_location(service_id, job_id):
-    current_app.logger.info(
+    current_app.logger.debug(
         f"#s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
     )
     return (
@@ -305,7 +305,7 @@ def get_old_job_location(service_id, job_id):
     but it will take a few days where we have to support both formats.
     Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE.
     """
-    current_app.logger.info(
+    current_app.logger.debug(
         f"#s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
     )
     return (
@@ -445,7 +445,7 @@ def extract_personalisation(job):
 def get_phone_number_from_s3(service_id, job_id, job_row_number):
     job = get_job_cache(job_id)
     if job is None:
-        current_app.logger.info(f"job {job_id} was not in the cache")
+        current_app.logger.debug(f"job {job_id} was not in the cache")
         job = get_job_from_s3(service_id, job_id)
         # Even if it is None, put it here to avoid KeyErrors
         set_job_cache(job_id, job)
@@ -479,7 +479,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
     # So this is a little recycling mechanism to reduce the number of downloads.
     job = get_job_cache(job_id)
     if job is None:
-        current_app.logger.info(f"job {job_id} was not in the cache")
+        current_app.logger.debug(f"job {job_id} was not in the cache")
         job = get_job_from_s3(service_id, job_id)
         # Even if it is None, put it here to avoid KeyErrors
         set_job_cache(job_id, job)
@@ -503,7 +503,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
 
 
 def get_job_metadata_from_s3(service_id, job_id):
-    current_app.logger.info(
+    current_app.logger.debug(
         f"#s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}"
     )
     obj = get_s3_object(*get_job_location(service_id, job_id))

From c8f2f7c0447db649290deb535cd2ba70221f2c53 Mon Sep 17 00:00:00 2001
From: Kenneth Kehl <@kkehl@flexion.us>
Date: Mon, 25 Nov 2024 07:49:04 -0800
Subject: [PATCH 3/3] Change max tasks per celery worker to 500

---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index 95db014e9..53ea039a0 100644
--- a/app/config.py
+++ b/app/config.py
@@ -166,7 +166,7 @@ class Config(object):
     current_minute = (datetime.now().minute + 1) % 60
 
     CELERY = {
-        "worker_max_tasks_per_child": 200,
+        "worker_max_tasks_per_child": 500,
         "broker_url": REDIS_URL,
         "broker_transport_options": {
             "visibility_timeout": 310,