From 772cb99a35bf4a00647c1a39678d294ff5388aeb Mon Sep 17 00:00:00 2001 From: han-so1omon Date: Tue, 11 Jun 2024 11:42:41 -0500 Subject: [PATCH] Fix issue 269, deleting pre-existing files error Description: The error arises when the user deletes the pre-existing files from Data/Resumes or Data/JobDescriptions Issue: The os.path commands from run_first.py yield FileNotFoundError on the Data/Processed/ directories Solution: - Add in a check to see if the directory exists in run_first.py - Make the directory if it does not exist in scripts/ --- .gitignore | 3 +++ resume_matcher/main.py | 7 +++---- run_first.py | 7 ++++++- scripts/JobDescriptionProcessor.py | 2 ++ scripts/ResumeProcessor.py | 2 ++ scripts/utils/ReadFiles.py | 4 ++++ scripts/utils/__init__.py | 2 +- 7 files changed, 21 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index eb392320..17e12b2b 100644 --- a/.gitignore +++ b/.gitignore @@ -145,3 +145,6 @@ scripts/similarity/config.yml # Processed or local files /Data/Processed/* *.local.pdf + +# ASDF version manager +.tool-versions diff --git a/resume_matcher/main.py b/resume_matcher/main.py index 7f19e4b4..53fe4c18 100644 --- a/resume_matcher/main.py +++ b/resume_matcher/main.py @@ -18,7 +18,9 @@ def get_filenames_from_dir(directory): - return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] + return [ + f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) + ] def process_files(resume, job_description): @@ -34,6 +36,3 @@ def process_files(resume, job_description): print(r.score) print(f"Processing resume: {resume}") print(f"Processing job description: {job_description}") - - - diff --git a/run_first.py b/run_first.py index bebfd19c..f59a9f1b 100644 --- a/run_first.py +++ b/run_first.py @@ -18,8 +18,13 @@ def read_json(filename): def remove_old_files(files_path): + try: + filenames = os.listdir(files_path) + except FileNotFoundError: + logging.info(f"Directory {files_path} does not exist. No files to remove") + return - for filename in os.listdir(files_path): + for filename in filenames: try: file_path = os.path.join(files_path, filename) diff --git a/scripts/JobDescriptionProcessor.py b/scripts/JobDescriptionProcessor.py index 6faf9811..e3e62cf4 100644 --- a/scripts/JobDescriptionProcessor.py +++ b/scripts/JobDescriptionProcessor.py @@ -4,6 +4,7 @@ from .parsers import ParseJobDesc, ParseResume from .ReadPdf import read_single_pdf +from .utils import make_sure_path_exists READ_JOB_DESCRIPTION_FROM = "Data/JobDescription/" SAVE_DIRECTORY = "Data/Processed/JobDescription" @@ -41,6 +42,7 @@ def _write_json_file(self, resume_dictionary: dict): + ".json" ) save_directory_name = pathlib.Path(SAVE_DIRECTORY) / file_name + make_sure_path_exists(SAVE_DIRECTORY) json_object = json.dumps(resume_dictionary, sort_keys=True, indent=14) with open(save_directory_name, "w+") as outfile: outfile.write(json_object) diff --git a/scripts/ResumeProcessor.py b/scripts/ResumeProcessor.py index c31f8f9b..99adad33 100644 --- a/scripts/ResumeProcessor.py +++ b/scripts/ResumeProcessor.py @@ -4,6 +4,7 @@ from .parsers import ParseJobDesc, ParseResume from .ReadPdf import read_single_pdf +from .utils import make_sure_path_exists READ_RESUME_FROM = "Data/Resumes/" SAVE_DIRECTORY = "Data/Processed/Resumes" @@ -38,6 +39,7 @@ def _write_json_file(self, resume_dictionary: dict): "Resume-" + self.input_file + resume_dictionary["unique_id"] + ".json" ) save_directory_name = pathlib.Path(SAVE_DIRECTORY) / file_name + make_sure_path_exists(SAVE_DIRECTORY) json_object = json.dumps(resume_dictionary, sort_keys=True, indent=14) with open(save_directory_name, "w+") as outfile: outfile.write(json_object) diff --git a/scripts/utils/ReadFiles.py b/scripts/utils/ReadFiles.py index d606e862..8910ae15 100644 --- a/scripts/utils/ReadFiles.py +++ b/scripts/utils/ReadFiles.py @@ -8,3 +8,7 @@ def get_filenames_from_dir(directory_path: str) -> list: if os.path.isfile(os.path.join(directory_path, f)) and f != ".DS_Store" ] return filenames + + +def make_sure_path_exists(directory_path: str): + os.makedirs(directory_path, exist_ok=True) diff --git a/scripts/utils/__init__.py b/scripts/utils/__init__.py index 0f690616..94d37945 100644 --- a/scripts/utils/__init__.py +++ b/scripts/utils/__init__.py @@ -1,3 +1,3 @@ from .logger import init_logging_config -from .ReadFiles import get_filenames_from_dir +from .ReadFiles import get_filenames_from_dir, make_sure_path_exists from .Utils import TextCleaner