diff --git a/.dockerignore b/.dockerignore index e935998..d94de03 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,4 +21,5 @@ coverage.xml .hypothesis .gitignore h -*.egg-info \ No newline at end of file +*.egg-info +.testEnv \ No newline at end of file diff --git a/.gitignore b/.gitignore index c4c1891..d3c99cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ .venv -*.egg-info \ No newline at end of file +*.egg-info +.testEnv \ No newline at end of file diff --git a/Dockerfile_test_image b/Dockerfile_test_image new file mode 100644 index 0000000..f5ce49d --- /dev/null +++ b/Dockerfile_test_image @@ -0,0 +1,23 @@ +FROM python:3-alpine3.18 + +ENV DIR=/project +ENV TESSDATA_PREFIX=/usr/share/tessdata + +RUN mkdir /watched +RUN mkdir /watched/text_extraction +RUN mkdir /watched/spell_checking +RUN mkdir /watched/output +RUN mkdir /watched/file_loader + +RUN apk add poppler-utils +RUN apk add libmagic +RUN apk add tesseract-ocr +RUN apk add tesseract-ocr-data-dan +RUN apk add tesseract-ocr-data-eng + +COPY . ${DIR} + +WORKDIR ${DIR} + +RUN pip install -r requirements.txt +RUN pip install --editable . diff --git a/docker-compose-test-image.yml b/docker-compose-test-image.yml new file mode 100644 index 0000000..17a1b1d --- /dev/null +++ b/docker-compose-test-image.yml @@ -0,0 +1,14 @@ +version: '3.7' + +services: + test_container: + build: + context: . + dockerfile: Dockerfile_test_image + + container_name: Project_tester + + command: python -m unittest discover -s src -p 'test_*.py' + + volumes: + - ./.testEnv:/project diff --git a/h b/h deleted file mode 100644 index 98102c2..0000000 --- a/h +++ /dev/null @@ -1,13 +0,0 @@ -""" Helper tool for improved programming experience """ - -import os -import sys - -if sys.argv[1].lower() == 'lint': - os.system('pylint ./**/*.py') - -elif sys.argv[1].lower() == 'test': - os.system("python -m unittest discover -s src -p 'test_*.py'") - -else: - print(f"Invalid command: {sys.argv[1]} - valid commands are: test & lint") diff --git a/readme.md b/readme.md index 78a4eae..0644016 100644 --- a/readme.md +++ b/readme.md @@ -1,40 +1,115 @@ -# Activating and setting up virtualenv +# GET STARTED +## Requirement +* **WSL** +* **Python 3.11+** +* **Docker** + +## Build the project +1) `Enter WSL (Windows ONLY)` + * Open terminal -> type `WSL` -> Hit Enter -> type `cd` -> Hit Enter +2) `git clone git@github.com:Knox-AAU/Preprocessessing_Text-extraction.git` +3) `cd Preprocessessing_Text-extraction` +4) `source run setup` + +## Easy shell commands (Custom script) + +***How to start dev environment*** ```bash -python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt +sh run dev up ``` -# Helper functions (h file): -To lint: +***How to stop dev environment*** ```bash -python h lint +sh run dev down ``` -To test: + +***How to lint project*** +```bash +sh run lint +``` + +***How to run project tests*** +```bash +sh run test +``` + +***How to run prod environment (SERVER ONLY)*** ```bash -python h test +sh run prod up +``` + +***How to stop prod environment (SERVER ONLY)*** +```bash +sh run prod down +``` + +## How to contribute +To be able to contribute to this project you will need fulfill following requirements: +* **Branching** + * *To begin your contribution you've to branch out directly from main. Remember to pull the newest version before branching out. When you're done with the branch, you create a pull request and get it approved by another person working on the project.* + * To make a new branch directly from terminal, you can use following commands: + * ``git pull`` + * ``git checkout -b {branchName}`` (e.g. **git checkout -b jc/new-branch-name**) + * ``git add {files}`` + * ``git commit -m {comment about changes}`` + * ``git push origin {branchName}`` (e.g. **git push origin jc/new-branch-name**) +* **Pull_requests** + * Atleast one person is required to review changes + * When pull_request is created, the workflow starts running - Checking for code structure, using a linter, and checking if unittests and other tests passes + * If workflow fails, then merging is blocked until fixed +* **Workflow** + * Workflow is built through 3 steps, where last step is divided in 3 parts + * Linter - Ensure good structure and readable code + * Unittest - Build-in testing module, ensuring integrity and validation of modules + * Deployment - Creates production packages that is pulled on server. Deployment creates three packages, one for each step in text-extraction. To run deployment, production branch (Main) need to be tagged, before workflow constructs packages. + +## Deployment +* **How to tag production** + * To tag the new production it can be done through terminal + * ``git tag {version} {branchName}`` (e.g. **git tag 1.2 main**) +* **How to deploy new version** + * After tagging next production package it is possible to pull from server + * Connect to AAU VPN + * Ssh into preproc01 `ssh @knox-preproc01.srv.aau.dk` + * Two options: + * Git clone project and use `sh run prod up` + * ``sudo docker compose -f docker-compose-prod.yml pull`` && ``docker compose -f docker-compose-prod.yml up`` + * Watchtower will pull new versions in future + +--- +--- +--- +--- +--- + +# Advanced/Detailed commands for project +## Activating and setting up virtualenv +```bash +python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt ``` -# To lint code +## To lint code 1) Be in folder with files you want to lint (usually root) 2) ```bash pylint ./**/*.py ``` -# To run tests +## To run tests 1) Be in root folder 2) ```bash python -m unittest discover -s src -p 'test_*.py' ``` -# Command to setup setuptools and fix imports etc +## Command to setup setuptools and fix imports etc ```bash python -m pip install --editable . ``` -# Docker compose commands +## Docker compose commands *Sudo rights may be needed - use: " **sudo {command you want to run}** "* -### Build containers +#### Build containers * **To build developer environment** ```bash docker compose -f docker-compose-dev.yml build @@ -44,22 +119,22 @@ docker compose -f docker-compose-dev.yml build docker compose -f docker-compose-prod.yml pull ``` -### Start containers +#### Start containers * **To run developer environment** ```bash -docker compose -f docker-compose-dev up -d +docker compose -f docker-compose-dev.yml up -d ``` * **To run production environment** ```bash -docker compose -f docker-compose-prod up -d +docker compose -f docker-compose-prod.yml up -d ``` -### Stop containers +#### Stop containers * **To stop developer environment** ```bash -docker compose -f docker-compose-dev down +docker compose -f docker-compose-dev.yml down ``` * **To stop production environment** ```bash -docker compose -f docker-compose-prod down +docker compose -f docker-compose-prod.yml down ``` \ No newline at end of file diff --git a/run b/run new file mode 100644 index 0000000..bd5a5d4 --- /dev/null +++ b/run @@ -0,0 +1,125 @@ +#!/bin/bash + +######Global var###### +option=$2 +######DONT TOUCH###### + +help(){ + #Display help + echo "Usage: sh run COMMAND [OPTION]" + echo "" + echo "Usage example: sh run dev up" + echo "" + echo "Available Commands:" + echo " setup \t Setting up Environment, Setup-Tools, Building Test-Tool and Fixes Imports" + echo " lint \t Run Lint on Project Files" + echo " test \t Run Test on Project Files" + echo " dev [up/down]\t Build and Start/Stop Docker-compose in DEV Environment" + echo " prod [up/down]\t Build and Start/Stop Docker-compose in PROD Environment" + echo "" + echo "Options for dev & prod" + echo " up \t Start Selected Docker Environment" + echo " down \t Stop Selected Docker Environment" + echo "" + echo "For more information check readme.md" +} + +setup(){ + python3 -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt + python3 -m pip install --editable . + mkdir .testEnv + sudo docker compose -f docker-compose-test-image.yml build +} + +lint(){ + pylint ./**/*.py +} + +tests(){ + sudo cp -r * .testEnv 2> /dev/null + sudo docker compose -f docker-compose-test-image.yml up +} + +dev(){ + case $option in + up) + echo "Building images ... This might take some time..." + sudo docker compose -f docker-compose-dev.yml build --quiet + echo "Images Have Been Created" + sudo docker compose -f docker-compose-dev.yml up -d + echo "Containers Created" + ;; + + down) + echo "Stopping Containers" + sudo docker compose -f docker-compose-dev.yml down + echo "Containers Stopped" + ;; + + *) + echo "" + echo "UNKNOWN PARAMETER:" + echo \"$option\" + echo "" + echo "USE ONE OPTION PARAMETER:" + echo " - up" + echo " - down" + echo "" + ;; + esac +} + +prod(){ + case $option in + up) + echo "Pulling images ... This might take some time..." + sudo docker compose -f docker-compose-prod.yml pull + echo "Images Have Been Pulled and Built" + sudo docker compose -f docker-compose-prod.yml up -d + echo "Containers Created" + ;; + + down) + echo "Stopping Containers" + sudo docker compose -f docker-compose-prod.yml down + echo "Containers Stopped" + ;; + + *) + echo "" + echo "UNKNOWN PARAMETER:" + echo \"$option\" + echo "" + echo "USE ONE OPTION PARAMETER:" + echo " - up" + echo " - down" + echo "" + ;; + esac +} + +case $1 in +setup) + setup + ;; + +lint) + lint + ;; + +test) + tests + ;; + +dev) + dev + ;; + +prod) + prod + ;; + +*) + help + ;; +esac diff --git a/src/file_loading/test/test_file_loader_text_extraction.py b/src/file_loading/test/test_file_loader_text_extraction.py index e114a56..a378dcd 100644 --- a/src/file_loading/test/test_file_loader_text_extraction.py +++ b/src/file_loading/test/test_file_loader_text_extraction.py @@ -4,6 +4,7 @@ import unittest import os +import shutil from file_loading.file_loader import FileLoader from text_extraction.text_extractor import TextExtractor @@ -14,9 +15,24 @@ class TestFileLoaderTextExtraction(unittest.TestCase): """ def setUp(self): + source = "src/file_loading/test/test_files/PDF_test1.pdf" + destination = "src/file_loading/test/PDF_test1.pdf" + + try: + shutil.copy(source, destination) + print("File copied successfully.") + except shutil.SameFileError: + print("Source and destination represents the same file.") + except PermissionError: + print("Permission denied.") + self.pdf_file_path = "src/file_loading/test/PDF_test1.pdf" self.output_folder_file_loader = "/watched/text_extraction/" + "out_0_PDF_test1.png" + def tearDown(self): + if os.path.exists(self.pdf_file_path): + os.remove(self.pdf_file_path) + def test_file_loader_and_text_extractor_integration(self): """ This method verifies that the FileLoader correctly loads an image file, @@ -41,4 +57,5 @@ def test_file_loader_and_text_extractor_integration(self): output_file_path = text_extractor.out_dir + "out_0_PDF_test1.txt" with open(output_file_path, "r", encoding="utf-8") as output_file: content = output_file.read() - self.assertIn("Word", content) + self.assertIn("word", content) + \ No newline at end of file diff --git a/src/file_loading/test/PDF_test1.pdf b/src/file_loading/test/test_files/PDF_test1.pdf similarity index 100% rename from src/file_loading/test/PDF_test1.pdf rename to src/file_loading/test/test_files/PDF_test1.pdf