From 8aa19807f1abf2303524e3b901ad0d77df8805cd Mon Sep 17 00:00:00 2001 From: Carlo Costino Date: Fri, 22 Nov 2024 10:51:14 -0500 Subject: [PATCH 1/3] Add redis ID to ignore vulnerability list in pip-audit This changeset adds a Python vulnerability that we need to ignore because it was incorrectly applied to the Python Redis module. This is a vulnerability with an older version of Redis itself, not the Python module. Signed-off-by: Carlo Costino --- .github/workflows/checks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 8324e6053..5244276bd 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -90,6 +90,8 @@ jobs: - uses: pypa/gh-action-pip-audit@v1.0.8 with: inputs: requirements.txt + ignore-vulns: | + PYSEC-2022-43162 static-scan: runs-on: ubuntu-latest From 7446aed2d1925c594b6c1c0693a58e3d600955d3 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Mon, 25 Nov 2024 07:29:00 -0800 Subject: [PATCH 2/3] remove excess log lines from s3 --- app/aws/s3.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index f83b9059d..e0022f20b 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -23,7 +23,7 @@ def set_job_cache(key, value): - current_app.logger.info(f"Setting {key} in the job_cache.") + current_app.logger.debug(f"Setting {key} in the job_cache.") job_cache = current_app.config["job_cache"] job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60) @@ -34,14 +34,14 @@ def get_job_cache(key): if ret is None: current_app.logger.warning(f"Could not find {key} in the job_cache.") else: - current_app.logger.info(f"Got {key} from job_cache.") + current_app.logger.debug(f"Got {key} from job_cache.") return ret def len_job_cache(): job_cache = current_app.config["job_cache"] ret = len(job_cache) - current_app.logger.info(f"Length of job_cache is {ret}") + current_app.logger.debug(f"Length of job_cache is {ret}") return ret @@ -53,7 +53,7 @@ def clean_cache(): if expiry_time < current_time: keys_to_delete.append(key) - current_app.logger.info( + current_app.logger.debug( f"Deleting the following keys from the job_cache: {keys_to_delete}" ) for key in keys_to_delete: @@ -139,7 +139,7 @@ def cleanup_old_s3_objects(): try: remove_csv_object(obj["Key"]) - current_app.logger.info( + current_app.logger.debug( f"#delete-old-s3-objects Deleted: {obj['LastModified']} {obj['Key']}" ) except botocore.exceptions.ClientError: @@ -287,7 +287,7 @@ def file_exists(file_location): def get_job_location(service_id, job_id): - current_app.logger.info( + current_app.logger.debug( f"#s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}" ) return ( @@ -305,7 +305,7 @@ def get_old_job_location(service_id, job_id): but it will take a few days where we have to support both formats. Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE. """ - current_app.logger.info( + current_app.logger.debug( f"#s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}" ) return ( @@ -445,7 +445,7 @@ def extract_personalisation(job): def get_phone_number_from_s3(service_id, job_id, job_row_number): job = get_job_cache(job_id) if job is None: - current_app.logger.info(f"job {job_id} was not in the cache") + current_app.logger.debug(f"job {job_id} was not in the cache") job = get_job_from_s3(service_id, job_id) # Even if it is None, put it here to avoid KeyErrors set_job_cache(job_id, job) @@ -479,7 +479,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number): # So this is a little recycling mechanism to reduce the number of downloads. job = get_job_cache(job_id) if job is None: - current_app.logger.info(f"job {job_id} was not in the cache") + current_app.logger.debug(f"job {job_id} was not in the cache") job = get_job_from_s3(service_id, job_id) # Even if it is None, put it here to avoid KeyErrors set_job_cache(job_id, job) @@ -503,7 +503,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number): def get_job_metadata_from_s3(service_id, job_id): - current_app.logger.info( + current_app.logger.debug( f"#s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}" ) obj = get_s3_object(*get_job_location(service_id, job_id)) From c8f2f7c0447db649290deb535cd2ba70221f2c53 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Mon, 25 Nov 2024 07:49:04 -0800 Subject: [PATCH 3/3] Change max tasks per celery worker to 500 --- app/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/config.py b/app/config.py index 95db014e9..53ea039a0 100644 --- a/app/config.py +++ b/app/config.py @@ -166,7 +166,7 @@ class Config(object): current_minute = (datetime.now().minute + 1) % 60 CELERY = { - "worker_max_tasks_per_child": 200, + "worker_max_tasks_per_child": 500, "broker_url": REDIS_URL, "broker_transport_options": { "visibility_timeout": 310,