Knox-AAU · JTC2000Official · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -21,4 +21,5 @@ coverage.xml
 .hypothesis
 .gitignore
 h
-*.egg-info
+*.egg-info
+.testEnv
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 __pycache__/
 .venv
-*.egg-info
+*.egg-info
+.testEnv
diff --git a/Dockerfile_test_image b/Dockerfile_test_image
@@ -0,0 +1,23 @@
+FROM python:3-alpine3.18
+
+ENV DIR=/project
+ENV TESSDATA_PREFIX=/usr/share/tessdata
+
+RUN mkdir /watched
+RUN mkdir /watched/text_extraction
+RUN mkdir /watched/spell_checking
+RUN mkdir /watched/output
+RUN mkdir /watched/file_loader
+
+RUN apk add poppler-utils
+RUN apk add libmagic
+RUN apk add tesseract-ocr
+RUN apk add tesseract-ocr-data-dan
+RUN apk add tesseract-ocr-data-eng
+
+COPY . ${DIR}
+
+WORKDIR ${DIR}
+
+RUN pip install -r requirements.txt
+RUN pip install --editable .
diff --git a/docker-compose-test-image.yml b/docker-compose-test-image.yml
@@ -0,0 +1,14 @@
+version: '3.7'
+
+services:
+  test_container:
+    build: 
+      context: .
+      dockerfile: Dockerfile_test_image
+
+    container_name: Project_tester
+
+    command: python -m unittest discover -s src -p 'test_*.py'
+
+    volumes:
+      - ./.testEnv:/project
diff --git a/h b/h
diff --git a/readme.md b/readme.md
@@ -1,40 +1,115 @@
-# Activating and setting up virtualenv
+# GET STARTED
+## Requirement
+* **WSL**
+* **Python 3.11+**
+* **Docker**
+
+## Build the project
+1) `Enter WSL (Windows ONLY)`
+    * Open terminal -> type `WSL` -> Hit Enter -> type `cd` -> Hit Enter
+2) `git clone [email protected]:Knox-AAU/Preprocessessing_Text-extraction.git`
+3) `cd Preprocessessing_Text-extraction`
+4) `source run setup`
+
+## Easy shell commands (Custom script)
+
+***How to start dev environment***
 ```bash
-python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt
+sh run dev up
 ```
 
-# Helper functions (h file):
-To lint: 
+***How to stop dev environment***
 ```bash
-python h lint
+sh run dev down
 ```
-To test: 
+
+***How to lint project***
+```bash
+sh run lint
+```
+
+***How to run project tests***
+```bash
+sh run test
+```
+
+***How to run prod environment (SERVER ONLY)***
 ```bash
-python h test
+sh run prod up
+```
+
+***How to stop prod environment (SERVER ONLY)***
+```bash
+sh run prod down
+```
+
+## How to contribute
+To be able to contribute to this project you will need fulfill following requirements:
+* **Branching**
+    * *To begin your contribution you've to branch out directly from main. Remember to pull the newest version before branching out. When you're done with the branch, you create a pull request and get it approved by another person working on the project.*
+    * To make a new branch directly from terminal, you can use following commands:
+    * ``git pull``
+    * ``git checkout -b {branchName}`` (e.g. **git checkout -b jc/new-branch-name**)
+    * ``git add {files}``
+    * ``git commit -m {comment about changes}``
+    * ``git push origin {branchName}`` (e.g. **git push origin jc/new-branch-name**)
+* **Pull_requests**
+    * Atleast one person is required to review changes
+    * When pull_request is created, the workflow starts running - Checking for code structure, using a linter, and checking if unittests and other tests passes
+        * If workflow fails, then merging is blocked until fixed
+* **Workflow**
+    * Workflow is built through 3 steps, where last step is divided in 3 parts
+        * Linter - Ensure good structure and readable code
+        * Unittest - Build-in testing module, ensuring integrity and validation of modules
+        * Deployment - Creates production packages that is pulled on server. Deployment creates three packages, one for each step in text-extraction. To run deployment, production branch (Main) need to be tagged, before workflow constructs packages.
+
+## Deployment
+* **How to tag production**
+    * To tag the new production it can be done through terminal
+        * ``git tag {version} {branchName}`` (e.g. **git tag 1.2 main**)
+* **How to deploy new version**
+    * After tagging next production package it is possible to pull from server
+    * Connect to AAU VPN
+    * Ssh into preproc01 `ssh <STUDENT_MAIL>@knox-preproc01.srv.aau.dk`
+    * Two options:
+        * Git clone project and use `sh run prod up`
+        * ``sudo docker compose -f docker-compose-prod.yml pull`` && ``docker compose -f docker-compose-prod.yml up``
+    * Watchtower will pull new versions in future
+
+---
+---
+---
+---
+---
+
+# Advanced/Detailed commands for project
+## Activating and setting up virtualenv
+```bash
+python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt
 ```
 
-# To lint code
+## To lint code
 1) Be in folder with files you want to lint (usually root)
 2) 
 ```bash
 pylint ./**/*.py
 ```
 
-# To run tests
+## To run tests
 1) Be in root folder 
 2) 
 ```bash
 python -m unittest discover -s src -p 'test_*.py'
 ```
 
-# Command to setup setuptools and fix imports etc
+## Command to setup setuptools and fix imports etc
 ```bash
 python -m pip install --editable .
 ```
 
-# Docker compose commands
+## Docker compose commands
 *Sudo rights may be needed - use: " **sudo {command you want to run}** "*
-### Build containers
+#### Build containers
 * **To build developer environment**
 ```bash
 docker compose -f docker-compose-dev.yml build
@@ -44,22 +119,22 @@ docker compose -f docker-compose-dev.yml build
 docker compose -f docker-compose-prod.yml pull
 ```
 
-### Start containers
+#### Start containers
 * **To run developer environment**
 ```bash
-docker compose -f docker-compose-dev up -d
+docker compose -f docker-compose-dev.yml up -d
 ```
 * **To run production environment**
 ```bash
-docker compose -f docker-compose-prod up -d
+docker compose -f docker-compose-prod.yml up -d
 ```
 
-### Stop containers
+#### Stop containers
 * **To stop developer environment**
 ```bash
-docker compose -f docker-compose-dev down
+docker compose -f docker-compose-dev.yml down
 ```
 * **To stop production environment**
 ```bash
-docker compose -f docker-compose-prod down
+docker compose -f docker-compose-prod.yml down
 ```
diff --git a/run b/run
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+######Global var######
+option=$2
+######DONT TOUCH######
+
+help(){
+    #Display help
+    echo "Usage: sh run COMMAND [OPTION]"
+    echo ""
+    echo "Usage example: sh run dev up"
+    echo ""
+    echo "Available Commands:"
+    echo "  setup         \t Setting up Environment, Setup-Tools, Building Test-Tool and Fixes Imports"
+    echo "  lint          \t Run Lint on Project Files"
+    echo "  test          \t Run Test on Project Files"
+    echo "  dev  [up/down]\t Build and Start/Stop Docker-compose in DEV Environment"
+    echo "  prod [up/down]\t Build and Start/Stop Docker-compose in PROD Environment"
+    echo ""
+    echo "Options for dev & prod"
+    echo "  up   \t Start Selected Docker Environment"
+    echo "  down \t Stop Selected Docker Environment"
+    echo ""
+    echo "For more information check readme.md"
+}
+
+setup(){
+    python3 -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt
+    python3 -m pip install --editable .
+    mkdir .testEnv
+    sudo docker compose -f docker-compose-test-image.yml build
+}
+
+lint(){
+    pylint ./**/*.py
+}
+
+tests(){
+    sudo cp -r * .testEnv 2> /dev/null
+    sudo docker compose -f docker-compose-test-image.yml up
+}
+
+dev(){
+    case $option in
+    up)
+        echo "Building images ... This might take some time..."
+        sudo docker compose -f docker-compose-dev.yml build --quiet
+        echo "Images Have Been Created"
+        sudo docker compose -f docker-compose-dev.yml up -d
+        echo "Containers Created"
+    ;;
+
+    down)
+        echo "Stopping Containers"
+        sudo docker compose -f docker-compose-dev.yml down
+        echo "Containers Stopped"
+    ;;
+
+    *)
+        echo ""
+        echo "UNKNOWN PARAMETER:"
+        echo \"$option\"
+        echo ""
+        echo "USE ONE OPTION PARAMETER:"
+        echo "  - up"
+        echo "  - down"
+        echo ""
+    ;;
+    esac
+}
+
+prod(){
+    case $option in
+    up)
+        echo "Pulling images ... This might take some time..."
+        sudo docker compose -f docker-compose-prod.yml pull
+        echo "Images Have Been Pulled and Built"
+        sudo docker compose -f docker-compose-prod.yml up -d
+        echo "Containers Created"
+    ;;
+
+    down)
+        echo "Stopping Containers"
+        sudo docker compose -f docker-compose-prod.yml down
+        echo "Containers Stopped"
+    ;;
+
+    *)
+        echo ""
+        echo "UNKNOWN PARAMETER:"
+        echo \"$option\"
+        echo ""
+        echo "USE ONE OPTION PARAMETER:"
+        echo "  - up"
+        echo "  - down"
+        echo ""
+    ;;
+    esac
+}
+
+case $1 in
+setup)
+    setup
+    ;;
+
+lint)
+    lint
+    ;;
+
+test)
+    tests
+    ;;
+
+dev)
+    dev
+    ;;
+
+prod)
+    prod
+    ;;
+
+*)
+    help
+    ;;
+esac
diff --git a/src/file_loading/test/test_file_loader_text_extraction.py b/src/file_loading/test/test_file_loader_text_extraction.py
@@ -4,6 +4,7 @@
 
 import unittest
 import os
+import shutil
 from file_loading.file_loader import FileLoader
 from text_extraction.text_extractor import TextExtractor
 
@@ -14,9 +15,24 @@ class TestFileLoaderTextExtraction(unittest.TestCase):
     """
 
     def setUp(self):
+        source = "src/file_loading/test/test_files/PDF_test1.pdf"
+        destination = "src/file_loading/test/PDF_test1.pdf"
+
+        try:
+            shutil.copy(source, destination)
+            print("File copied successfully.")
+        except shutil.SameFileError:
+            print("Source and destination represents the same file.")
+        except PermissionError:
+            print("Permission denied.")
+
         self.pdf_file_path = "src/file_loading/test/PDF_test1.pdf"
         self.output_folder_file_loader = "/watched/text_extraction/" + "out_0_PDF_test1.png"
 
+    def tearDown(self):
+        if os.path.exists(self.pdf_file_path):
+            os.remove(self.pdf_file_path)
+
     def test_file_loader_and_text_extractor_integration(self):
         """
         This method verifies that the FileLoader correctly loads an image file,
@@ -41,4 +57,5 @@ def test_file_loader_and_text_extractor_integration(self):
         output_file_path = text_extractor.out_dir + "out_0_PDF_test1.txt"
         with open(output_file_path, "r", encoding="utf-8") as output_file:
             content = output_file.read()
-            self.assertIn("Word", content)
+            self.assertIn("word", content)
+
diff --git a/src/file_loading/test/PDF_test1.pdf → ...ile_loading/test/test_files/PDF_test1.pdf b/src/file_loading/test/PDF_test1.pdf → ...ile_loading/test/test_files/PDF_test1.pdf