From 58f346c98b64deb944227e0e82cb397e316e9c37 Mon Sep 17 00:00:00 2001 From: P4jMepR Date: Sun, 30 Jun 2024 04:44:33 +0200 Subject: [PATCH 1/2] Pathing changes + .gitignore fix As in title: - Static pathing -> Relative pathing (in run_first.py) - previous .gitignore structure messed up directory structure (#272) --- .gitignore | 4 +++- run_first.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index eb392320..2b68f2fe 100644 --- a/.gitignore +++ b/.gitignore @@ -143,5 +143,7 @@ scripts/similarity/config.yml *.local.yml # Processed or local files -/Data/Processed/* +/Data/Processed/JobDescription +/Data/Processed/Resumes + *.local.pdf diff --git a/run_first.py b/run_first.py index bebfd19c..62d39fe0 100644 --- a/run_first.py +++ b/run_first.py @@ -1,14 +1,16 @@ import json import logging import os +import traceback + from scripts import JobDescriptionProcessor, ResumeProcessor from scripts.utils import get_filenames_from_dir, init_logging_config init_logging_config() -PROCESSED_RESUMES_PATH = "Data/Processed/Resumes" -PROCESSED_JOB_DESCRIPTIONS_PATH = "Data/Processed/JobDescription" +PROCESSED_RESUMES_PATH = os.path.join(os.getcwd(), "Data", "Processed", "Resumes") +PROCESSED_JOB_DESCRIPTIONS_PATH = os.path.join(os.getcwd(), "Data", "Processed", "JobDescription") def read_json(filename): @@ -44,6 +46,7 @@ def remove_old_files(files_path): logging.error("There are no resumes present in the specified folder.") logging.error("Exiting from the program.") logging.error("Please add resumes in the Data/Resumes folder and try again.") + logging.error(str(traceback.format_exc())) exit(1) # Now after getting the file_names parse the resumes into a JSON Format. @@ -66,6 +69,7 @@ def remove_old_files(files_path): logging.error("There are no job-description present in the specified folder.") logging.error("Exiting from the program.") logging.error("Please add resumes in the Data/JobDescription folder and try again.") + logging.error(str(traceback.format_exc())) exit(1) # Now after getting the file_names parse the resumes into a JSON Format. From c912d21a08f978bcb29392206802f01905b34f8b Mon Sep 17 00:00:00 2001 From: P4jMepR Date: Sun, 30 Jun 2024 05:13:16 +0200 Subject: [PATCH 2/2] Workaround of git rules Git won't acknowledge directories without any files within them. Now if file structure is not complete it will create one. (solved #272) --- run_first.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/run_first.py b/run_first.py index 62d39fe0..7ea25249 100644 --- a/run_first.py +++ b/run_first.py @@ -9,6 +9,14 @@ init_logging_config() +processed_Path = os.path.join(os.getcwd(), "Data", "Processed") +if not os.path.exists(os.path.join(processed_Path)): + logging.info('"/Processed/" directory structure is missing, setting up a new one.\n') + os.mkdir(processed_Path) + os.mkdir(os.path.join(processed_Path, "Resumes")) + os.mkdir(os.path.join(processed_Path, "Data")) + + PROCESSED_RESUMES_PATH = os.path.join(os.getcwd(), "Data", "Processed", "Resumes") PROCESSED_JOB_DESCRIPTIONS_PATH = os.path.join(os.getcwd(), "Data", "Processed", "JobDescription")