From 2ae20bf036a96cc020ef7bae8af76a966a824440 Mon Sep 17 00:00:00 2001 From: JTC2000Official Date: Thu, 7 Dec 2023 09:38:09 +0100 Subject: [PATCH 1/8] fixed typo in readme --- readme.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/readme.md b/readme.md index 78a4eae..82c3aa4 100644 --- a/readme.md +++ b/readme.md @@ -47,19 +47,19 @@ docker compose -f docker-compose-prod.yml pull ### Start containers * **To run developer environment** ```bash -docker compose -f docker-compose-dev up -d +docker compose -f docker-compose-dev.yml up -d ``` * **To run production environment** ```bash -docker compose -f docker-compose-prod up -d +docker compose -f docker-compose-prod.yml up -d ``` ### Stop containers * **To stop developer environment** ```bash -docker compose -f docker-compose-dev down +docker compose -f docker-compose-dev.yml down ``` * **To stop production environment** ```bash -docker compose -f docker-compose-prod down +docker compose -f docker-compose-prod.yml down ``` \ No newline at end of file From ebc11f9c2406b69bb253b8e4c2e2bc92492975e0 Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 7 Dec 2023 12:44:17 +0100 Subject: [PATCH 2/8] Fixed test file delete --- .../test/test_file_loader_text_extraction.py | 21 +++++++++++++++++- .../test/{ => test_files}/PDF_test1.pdf | Bin 2 files changed, 20 insertions(+), 1 deletion(-) rename src/file_loading/test/{ => test_files}/PDF_test1.pdf (100%) diff --git a/src/file_loading/test/test_file_loader_text_extraction.py b/src/file_loading/test/test_file_loader_text_extraction.py index e114a56..33ca41b 100644 --- a/src/file_loading/test/test_file_loader_text_extraction.py +++ b/src/file_loading/test/test_file_loader_text_extraction.py @@ -4,6 +4,7 @@ import unittest import os +import shutil from file_loading.file_loader import FileLoader from text_extraction.text_extractor import TextExtractor @@ -14,8 +15,25 @@ class TestFileLoaderTextExtraction(unittest.TestCase): """ def setUp(self): + source = "src/file_loading/test/test_files/PDF_test1.PDF" + destination = "src/file_loading/test/PDF_test1.pdf" + + try: + shutil.copy(source, destination) + print("File copied successfully.") + except shutil.SameFileError: + print("Source and destination represents the same file.") + except PermissionError: + print("Permission denied.") + except: + print("Error occurred while copying file.") + self.pdf_file_path = "src/file_loading/test/PDF_test1.pdf" self.output_folder_file_loader = "/watched/text_extraction/" + "out_0_PDF_test1.png" + + def tearDown(self): + if os.path.exists(self.pdf_file_path): + os.remove(self.pdf_file_path) def test_file_loader_and_text_extractor_integration(self): """ @@ -41,4 +59,5 @@ def test_file_loader_and_text_extractor_integration(self): output_file_path = text_extractor.out_dir + "out_0_PDF_test1.txt" with open(output_file_path, "r", encoding="utf-8") as output_file: content = output_file.read() - self.assertIn("Word", content) + self.assertIn("word", content) + \ No newline at end of file diff --git a/src/file_loading/test/PDF_test1.pdf b/src/file_loading/test/test_files/PDF_test1.pdf similarity index 100% rename from src/file_loading/test/PDF_test1.pdf rename to src/file_loading/test/test_files/PDF_test1.pdf From 08c8238606dbe4fcd4eb1018b9c392d5d434c87b Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 7 Dec 2023 12:51:24 +0100 Subject: [PATCH 3/8] Fixed lint --- .../test/test_file_loader_text_extraction.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/file_loading/test/test_file_loader_text_extraction.py b/src/file_loading/test/test_file_loader_text_extraction.py index 33ca41b..a378dcd 100644 --- a/src/file_loading/test/test_file_loader_text_extraction.py +++ b/src/file_loading/test/test_file_loader_text_extraction.py @@ -15,9 +15,9 @@ class TestFileLoaderTextExtraction(unittest.TestCase): """ def setUp(self): - source = "src/file_loading/test/test_files/PDF_test1.PDF" + source = "src/file_loading/test/test_files/PDF_test1.pdf" destination = "src/file_loading/test/PDF_test1.pdf" - + try: shutil.copy(source, destination) print("File copied successfully.") @@ -25,12 +25,10 @@ def setUp(self): print("Source and destination represents the same file.") except PermissionError: print("Permission denied.") - except: - print("Error occurred while copying file.") - + self.pdf_file_path = "src/file_loading/test/PDF_test1.pdf" self.output_folder_file_loader = "/watched/text_extraction/" + "out_0_PDF_test1.png" - + def tearDown(self): if os.path.exists(self.pdf_file_path): os.remove(self.pdf_file_path) From 1b8c3afaab8bc4c7d06de647af7df441803d7f7f Mon Sep 17 00:00:00 2001 From: JTC2000Official Date: Thu, 7 Dec 2023 13:24:45 +0100 Subject: [PATCH 4/8] init for improved documentation --- Dockerfile_test_image | 25 +++++++++ docker-compose-test-image.yml | 12 +++++ run | 95 +++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 Dockerfile_test_image create mode 100644 docker-compose-test-image.yml create mode 100644 run diff --git a/Dockerfile_test_image b/Dockerfile_test_image new file mode 100644 index 0000000..53f0d96 --- /dev/null +++ b/Dockerfile_test_image @@ -0,0 +1,25 @@ +FROM python:3-alpine3.18 + +ENV DIR=project +ENV TESSDATA_PREFIX=/usr/share/tessdata + +RUN mkdir file_loading +RUN mkdir spell_checking +RUN mkdir text_extraction + +RUN apk add poppler-utils +RUN apk add libmagic +RUN apk add poppler-utils +RUN apk add poppler-utils +RUN apk add tesseract-ocr +RUN apk add tesseract-ocr-data-dan +RUN apk add tesseract-ocr-data-eng + +WORKDIR /${DIR}/ + +COPY . ${DIR}/testing + +RUN pip install -r requirements.txt +RUN pip install --editable . + +CMD [ "python -m unittest discover -s src -p 'test_*.py'" ] \ No newline at end of file diff --git a/docker-compose-test-image.yml b/docker-compose-test-image.yml new file mode 100644 index 0000000..36207b5 --- /dev/null +++ b/docker-compose-test-image.yml @@ -0,0 +1,12 @@ +version: '3.7' + +services: + test_container: + build: + context: . + dockerfile: Dockerfile_test_image + + container_name: Project_tester + + volumes: + - .:/project/ diff --git a/run b/run new file mode 100644 index 0000000..5034a08 --- /dev/null +++ b/run @@ -0,0 +1,95 @@ +#!/bin/bash +help(){ + #Display help + echo "Usage: sh run COMMAND [OPTION]" + echo "" + echo "Usage example: sh run dev up" + echo "" + echo "Available Commands:" + echo " setup \t Setting up Environment, Setup-Tools and Fixes Imports" + echo " lint \t Run Lint on Project Files" + echo " test \t Run Test on Project Files" + echo " dev [up/down]\t Build and Start/Stop Docker-compose in DEV Environment" + echo " prod [up/down]\t Build and Start/Stop Docker-compose in PROD Environment" + echo "" + echo "Options for dev & prod" + echo " up \t Start Selected Docker Environment" + echo " down \t Stop Selected Docker Environment" + echo "" + echo "For more information check readme.md" +} + +setup(){ + python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt + python -m pip install --editable . +} + +lint(){ + pylint ./**/*.py +} + +tests(){ ##FIX MANGLER NY CONTAINER + python -m unittest discover -s src -p 'test_*.py' +} + +dev(){ + case $2 in + up) + echo "Building images ... This might take some time..." + sudo docker compose -f docker-compose-dev.yml build --quiet + echo "Images Have Been Created" + docker compose -f docker-compose-dev.yml up -d + echo "Containers Created" + ;; + + down) + echo "Stopping Containers" + docker compose -f docker-compose-dev.yml down + echo "Containers Stopped" + ;; + esac +} + +prod(){ + case $2 in + up) + echo "Pulling images ... This might take some time..." + sudo docker compose -f docker-compose-prod.yml pull + echo "Images Have Been Pulled and Built" + sudo docker compose -f docker-compose-prod.yml up -d + echo "Containers Created" + ;; + + down) + echo "Stopping Containers" + docker compose -f docker-compose-prod.yml down + echo "Containers Stopped" + ;; + esac +} + +case $1 in +setup) + setup + ;; + +lint) + lint + ;; + +test) + tests + ;; + +dev) + dev + ;; + +prod) + prod + ;; + +*) + help + ;; +esac From f09254b9626349e7c74c10b38727098593bab1d9 Mon Sep 17 00:00:00 2001 From: JTC2000Official Date: Thu, 7 Dec 2023 14:59:28 +0100 Subject: [PATCH 5/8] Added easy shell command to run project development --- .dockerignore | 3 ++- .gitignore | 3 ++- Dockerfile_test_image | 18 ++++++------- docker-compose-test-image.yml | 4 ++- run | 50 ++++++++++++++++++++++++++++------- 5 files changed, 55 insertions(+), 23 deletions(-) diff --git a/.dockerignore b/.dockerignore index e935998..d94de03 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,4 +21,5 @@ coverage.xml .hypothesis .gitignore h -*.egg-info \ No newline at end of file +*.egg-info +.testEnv \ No newline at end of file diff --git a/.gitignore b/.gitignore index c4c1891..d3c99cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ .venv -*.egg-info \ No newline at end of file +*.egg-info +.testEnv \ No newline at end of file diff --git a/Dockerfile_test_image b/Dockerfile_test_image index 53f0d96..f5ce49d 100644 --- a/Dockerfile_test_image +++ b/Dockerfile_test_image @@ -1,25 +1,23 @@ FROM python:3-alpine3.18 -ENV DIR=project +ENV DIR=/project ENV TESSDATA_PREFIX=/usr/share/tessdata -RUN mkdir file_loading -RUN mkdir spell_checking -RUN mkdir text_extraction +RUN mkdir /watched +RUN mkdir /watched/text_extraction +RUN mkdir /watched/spell_checking +RUN mkdir /watched/output +RUN mkdir /watched/file_loader RUN apk add poppler-utils RUN apk add libmagic -RUN apk add poppler-utils -RUN apk add poppler-utils RUN apk add tesseract-ocr RUN apk add tesseract-ocr-data-dan RUN apk add tesseract-ocr-data-eng -WORKDIR /${DIR}/ +COPY . ${DIR} -COPY . ${DIR}/testing +WORKDIR ${DIR} RUN pip install -r requirements.txt RUN pip install --editable . - -CMD [ "python -m unittest discover -s src -p 'test_*.py'" ] \ No newline at end of file diff --git a/docker-compose-test-image.yml b/docker-compose-test-image.yml index 36207b5..17a1b1d 100644 --- a/docker-compose-test-image.yml +++ b/docker-compose-test-image.yml @@ -8,5 +8,7 @@ services: container_name: Project_tester + command: python -m unittest discover -s src -p 'test_*.py' + volumes: - - .:/project/ + - ./.testEnv:/project diff --git a/run b/run index 5034a08..5db14e4 100644 --- a/run +++ b/run @@ -1,4 +1,9 @@ #!/bin/bash + +######Global var###### +option=$2 +######DONT TOUCH###### + help(){ #Display help echo "Usage: sh run COMMAND [OPTION]" @@ -6,7 +11,7 @@ help(){ echo "Usage example: sh run dev up" echo "" echo "Available Commands:" - echo " setup \t Setting up Environment, Setup-Tools and Fixes Imports" + echo " setup \t Setting up Environment, Setup-Tools, Building Test-Tool and Fixes Imports" echo " lint \t Run Lint on Project Files" echo " test \t Run Test on Project Files" echo " dev [up/down]\t Build and Start/Stop Docker-compose in DEV Environment" @@ -22,24 +27,27 @@ help(){ setup(){ python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt python -m pip install --editable . + mkdir .testEnv + sudo docker compose -f docker-compose-test-image.yml build } lint(){ pylint ./**/*.py } -tests(){ ##FIX MANGLER NY CONTAINER - python -m unittest discover -s src -p 'test_*.py' +tests(){ + cp -r * .testEnv 2> /dev/null + sudo docker compose -f docker-compose-test-image.yml up } dev(){ - case $2 in + case $option in up) - echo "Building images ... This might take some time..." - sudo docker compose -f docker-compose-dev.yml build --quiet - echo "Images Have Been Created" - docker compose -f docker-compose-dev.yml up -d - echo "Containers Created" + echo "Building images ... This might take some time..." + sudo docker compose -f docker-compose-dev.yml build --quiet + echo "Images Have Been Created" + docker compose -f docker-compose-dev.yml up -d + echo "Containers Created" ;; down) @@ -47,11 +55,22 @@ dev(){ docker compose -f docker-compose-dev.yml down echo "Containers Stopped" ;; + + *) + echo "" + echo "UNKNOWN PARAMETER:" + echo \"$option\" + echo "" + echo "USE ONE OPTION PARAMETER:" + echo " - up" + echo " - down" + echo "" + ;; esac } prod(){ - case $2 in + case $option in up) echo "Pulling images ... This might take some time..." sudo docker compose -f docker-compose-prod.yml pull @@ -65,6 +84,17 @@ prod(){ docker compose -f docker-compose-prod.yml down echo "Containers Stopped" ;; + + *) + echo "" + echo "UNKNOWN PARAMETER:" + echo \"$option\" + echo "" + echo "USE ONE OPTION PARAMETER:" + echo " - up" + echo " - down" + echo "" + ;; esac } From d099cd3401c18443dada67544d64111f60279ef6 Mon Sep 17 00:00:00 2001 From: JTC2000Official Date: Thu, 7 Dec 2023 15:39:17 +0100 Subject: [PATCH 6/8] added run options --- run | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run b/run index 5db14e4..aca9c51 100644 --- a/run +++ b/run @@ -25,8 +25,8 @@ help(){ } setup(){ - python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt - python -m pip install --editable . + python3 -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt + python3 -m pip install --editable . mkdir .testEnv sudo docker compose -f docker-compose-test-image.yml build } From 4123a7bdeb0355c91e9b28aeeee0b8586774d950 Mon Sep 17 00:00:00 2001 From: JTC2000Official Date: Thu, 7 Dec 2023 16:07:52 +0100 Subject: [PATCH 7/8] removed h file + removed h info from readme + fixed run file --- h | 13 ---------- readme.md | 71 ++++++++++++++++++++++++++++++++++++++++++++----------- run | 8 +++---- 3 files changed, 61 insertions(+), 31 deletions(-) delete mode 100644 h diff --git a/h b/h deleted file mode 100644 index 98102c2..0000000 --- a/h +++ /dev/null @@ -1,13 +0,0 @@ -""" Helper tool for improved programming experience """ - -import os -import sys - -if sys.argv[1].lower() == 'lint': - os.system('pylint ./**/*.py') - -elif sys.argv[1].lower() == 'test': - os.system("python -m unittest discover -s src -p 'test_*.py'") - -else: - print(f"Invalid command: {sys.argv[1]} - valid commands are: test & lint") diff --git a/readme.md b/readme.md index 82c3aa4..01606f2 100644 --- a/readme.md +++ b/readme.md @@ -1,40 +1,83 @@ -# Activating and setting up virtualenv +# GET STARTED +## Requirement +* **WSL** +* **Python 3.11+** +* **Docker** + +## Build the project +1) `Enter WSL (Windows ONLY)` + * Open terminal -> type `WSL` -> Hit Enter -> type `cd` -> Hit Enter +2) `git clone git@github.com:Knox-AAU/Preprocessessing_Text-extraction.git` +3) `cd Preprocessessing_Text-extraction` +4) `source run setup` + +## How to start dev environment ```bash -python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt +sh run dev up +``` + +## How to stop dev environment +```bash +sh run dev down ``` -# Helper functions (h file): -To lint: +## How to lint project ```bash -python h lint +sh run lint ``` -To test: + +## How to run project tests +```bash +sh run test +``` + +## How to run prod environment (SERVER ONLY) ```bash -python h test +sh run prod up +``` + +## How to stop prod environment (SERVER ONLY) +```bash +sh run prod down +``` + +## How to contribute +* **Branching** +* **Pull_requests** +* **Workflow** + +## Deployment +* **How to deploy new version** +* **How to tag production** + +# Advanced/Detailed commands for project +## Activating and setting up virtualenv +```bash +python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt ``` -# To lint code +## To lint code 1) Be in folder with files you want to lint (usually root) 2) ```bash pylint ./**/*.py ``` -# To run tests +## To run tests 1) Be in root folder 2) ```bash python -m unittest discover -s src -p 'test_*.py' ``` -# Command to setup setuptools and fix imports etc +## Command to setup setuptools and fix imports etc ```bash python -m pip install --editable . ``` -# Docker compose commands +## Docker compose commands *Sudo rights may be needed - use: " **sudo {command you want to run}** "* -### Build containers +#### Build containers * **To build developer environment** ```bash docker compose -f docker-compose-dev.yml build @@ -44,7 +87,7 @@ docker compose -f docker-compose-dev.yml build docker compose -f docker-compose-prod.yml pull ``` -### Start containers +#### Start containers * **To run developer environment** ```bash docker compose -f docker-compose-dev.yml up -d @@ -54,7 +97,7 @@ docker compose -f docker-compose-dev.yml up -d docker compose -f docker-compose-prod.yml up -d ``` -### Stop containers +#### Stop containers * **To stop developer environment** ```bash docker compose -f docker-compose-dev.yml down diff --git a/run b/run index aca9c51..bd5a5d4 100644 --- a/run +++ b/run @@ -36,7 +36,7 @@ lint(){ } tests(){ - cp -r * .testEnv 2> /dev/null + sudo cp -r * .testEnv 2> /dev/null sudo docker compose -f docker-compose-test-image.yml up } @@ -46,13 +46,13 @@ dev(){ echo "Building images ... This might take some time..." sudo docker compose -f docker-compose-dev.yml build --quiet echo "Images Have Been Created" - docker compose -f docker-compose-dev.yml up -d + sudo docker compose -f docker-compose-dev.yml up -d echo "Containers Created" ;; down) echo "Stopping Containers" - docker compose -f docker-compose-dev.yml down + sudo docker compose -f docker-compose-dev.yml down echo "Containers Stopped" ;; @@ -81,7 +81,7 @@ prod(){ down) echo "Stopping Containers" - docker compose -f docker-compose-prod.yml down + sudo docker compose -f docker-compose-prod.yml down echo "Containers Stopped" ;; From 8021b327f74c2627161a4f92d145721ba01f212a Mon Sep 17 00:00:00 2001 From: JTC2000Official Date: Thu, 7 Dec 2023 16:48:19 +0100 Subject: [PATCH 8/8] updated readme done --- readme.md | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/readme.md b/readme.md index 01606f2..0644016 100644 --- a/readme.md +++ b/readme.md @@ -11,44 +11,76 @@ 3) `cd Preprocessessing_Text-extraction` 4) `source run setup` -## How to start dev environment +## Easy shell commands (Custom script) + +***How to start dev environment*** ```bash sh run dev up ``` -## How to stop dev environment +***How to stop dev environment*** ```bash sh run dev down ``` -## How to lint project +***How to lint project*** ```bash sh run lint ``` -## How to run project tests +***How to run project tests*** ```bash sh run test ``` -## How to run prod environment (SERVER ONLY) +***How to run prod environment (SERVER ONLY)*** ```bash sh run prod up ``` -## How to stop prod environment (SERVER ONLY) +***How to stop prod environment (SERVER ONLY)*** ```bash sh run prod down ``` ## How to contribute +To be able to contribute to this project you will need fulfill following requirements: * **Branching** + * *To begin your contribution you've to branch out directly from main. Remember to pull the newest version before branching out. When you're done with the branch, you create a pull request and get it approved by another person working on the project.* + * To make a new branch directly from terminal, you can use following commands: + * ``git pull`` + * ``git checkout -b {branchName}`` (e.g. **git checkout -b jc/new-branch-name**) + * ``git add {files}`` + * ``git commit -m {comment about changes}`` + * ``git push origin {branchName}`` (e.g. **git push origin jc/new-branch-name**) * **Pull_requests** + * Atleast one person is required to review changes + * When pull_request is created, the workflow starts running - Checking for code structure, using a linter, and checking if unittests and other tests passes + * If workflow fails, then merging is blocked until fixed * **Workflow** + * Workflow is built through 3 steps, where last step is divided in 3 parts + * Linter - Ensure good structure and readable code + * Unittest - Build-in testing module, ensuring integrity and validation of modules + * Deployment - Creates production packages that is pulled on server. Deployment creates three packages, one for each step in text-extraction. To run deployment, production branch (Main) need to be tagged, before workflow constructs packages. ## Deployment -* **How to deploy new version** * **How to tag production** + * To tag the new production it can be done through terminal + * ``git tag {version} {branchName}`` (e.g. **git tag 1.2 main**) +* **How to deploy new version** + * After tagging next production package it is possible to pull from server + * Connect to AAU VPN + * Ssh into preproc01 `ssh @knox-preproc01.srv.aau.dk` + * Two options: + * Git clone project and use `sh run prod up` + * ``sudo docker compose -f docker-compose-prod.yml pull`` && ``docker compose -f docker-compose-prod.yml up`` + * Watchtower will pull new versions in future + +--- +--- +--- +--- +--- # Advanced/Detailed commands for project ## Activating and setting up virtualenv