Skip to content

Commit

Permalink
Merge pull request #19 from Knox-AAU/jc/code-cleanup
Browse files Browse the repository at this point in the history
Jc/code cleanup
  • Loading branch information
JTC2000Official authored Nov 27, 2023
2 parents 64c451d + 1b7d750 commit 3d08cbe
Show file tree
Hide file tree
Showing 13 changed files with 29 additions and 42 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- run: sudo apt-get update
- run: sudo apt-get install poppler-utils
- run: pip install -r requirements.txt
- run: "python3 -m unittest discover -s src -p 'test_*.py'"
10 changes: 5 additions & 5 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt
```

# If cool boy:
# Helper functions (h file):
To lint:
```bash
python h lint
Expand All @@ -29,19 +29,19 @@ python -m unittest discover -s src -p 'test_*.py'

# Command to setup setuptools and fix imports etc
```bash
python3 -m pip install --editable .
python -m pip install --editable .
```

# Docker compose commands

*Sudo rights may be needed - use: " **sudo {command you want to run}** "*
### Build containers
* **To build developer environment**
```bash
docker compose -f docker-compose-dev.yml build
```
* **To build production environment**
* **To pull production environment**
```bash
docker compose -f docker-compose-prod.yml build
docker compose -f docker-compose-prod.yml pull
```

### Start containers
Expand Down
5 changes: 1 addition & 4 deletions src/file_loading/file_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,10 @@ def removefile(self):
os.remove(self.path + self.extension)

def handle_files(self, read_file):
""" remake me """
""" File handler for moving files to text_extraction folder """
output_folder = "/watched/text_extraction/"
output_file_path = output_folder + "out_" + str(read_file).rsplit('/', maxsplit=1)[-1]

shutil.copy(read_file, output_file_path)

# print(f"shutil copied: {read_file}")
# os.remove(os.path.join(path, name))

print(f"Fileloader moved: {read_file} to {output_file_path}")
2 changes: 1 addition & 1 deletion src/file_loading/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Entrance file for the file loader step of step 1 of the Knox pipeline"""
from file_loading.file_loader import FileLoader
from folder_watcher.folder_watcher import FolderWatcher
from file_loading.file_loader import FileLoader

if __name__ == '__main__':
file_loader = FileLoader()
Expand Down
1 change: 1 addition & 0 deletions src/file_loading/test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# pylint: skip-file
8 changes: 8 additions & 0 deletions src/file_loading/test/test_file_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,28 @@ class TestCase(unittest.TestCase):
"""Testing the SpellChecker class"""
def test_reads_correct_path(self):
""" Test if the readextension function reads the correct path """

# Arrange
file_loader = FileLoader()
file_loader.readextension('src/file_loading/test/test.pdf')

# Act
actual_path = file_loader.path

# Assert
self.assertEqual(actual_path, 'src/file_loading/test/test')


def test_reads_correct_extension(self):
""" Test if the readextension function reads the correct file extension """

# Arrange
file_loader = FileLoader()
file_loader.readextension('src/file_loading/test/test.pdf')

# Act
actual_extension = file_loader.extension

# Assert
self.assertEqual(actual_extension, '.pdf')

Expand All @@ -33,6 +37,7 @@ def test_reads_correct_extension(self):
# TODO: Right now it just reads from printed output :))
def test_opens_pdf(self):
""" Test if the openpdf function correctly opens and reads data """

# Arrange
file_loader = FileLoader()
file_loader.readextension('src/file_loading/test/test.pdf')
Expand All @@ -46,6 +51,7 @@ def test_opens_pdf(self):
file_loader.images[0].mode,
file_loader.images[0].file_name,
]

# Assert
self.assertListEqual(actual, [
'PPM',
Expand All @@ -57,6 +63,7 @@ def test_opens_pdf(self):

def test_opens_image(self):
""" Test if the openimage function correctly opens and reads data """

# Arrange
file_loader = FileLoader()
file_loader.readextension('src/file_loading/test/test.jpg')
Expand All @@ -69,6 +76,7 @@ def test_opens_image(self):
file_loader.images[0].mode,
file_loader.images[0].file_name,
]

# Assert
self.assertListEqual(actual, [
'JPEG',
Expand Down
9 changes: 3 additions & 6 deletions src/folder_watcher/folder_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ def __init__(self, function_to_run):
self.function_to_run = function_to_run

def on_created(self, event):
# print(event)
if not event.is_directory:
full_path = os.path.join(os.getcwd(), event.src_path)
self.function_to_run(full_path)

def on_modified(self, event):
# print(event)
if not event.is_directory:
full_path = os.path.join(os.getcwd(), event.src_path)
self.function_to_run(full_path)
Expand All @@ -31,22 +29,21 @@ class FolderWatcher:
:param: function_to_run is the function that should be applied to the file that is found.
this function should start by reading the file
"""

def __init__(self, path_to_watch, function_to_run):
self.path_to_watch = path_to_watch
self.function_to_run = function_to_run

def watch(self):
""" This method is a wrapper function that watches the folder and
"""This method is a wrapper function that watches the folder and
applys a function to files in the folder"""

watcher = _Watcher(self.function_to_run)

observer = Observer()
observer.schedule(watcher, path=self.path_to_watch, recursive=True)
observer.start()

# Remove; only for test
print(f"Watching: {self.path_to_watch}\n")

try:
while True:
sleep(1)
Expand Down
2 changes: 1 addition & 1 deletion src/spell_checking/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" Entrance file for the spell checking step of step 1 of the Knox pipeline """
from spell_checking.spell_checker import SpellChecker
from folder_watcher.folder_watcher import FolderWatcher
from spell_checking.spell_checker import SpellChecker

if __name__ == '__main__':
spellchecker = SpellChecker("src/spell_checking/wordList.txt")
Expand Down
21 changes: 1 addition & 20 deletions src/spell_checking/spell_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,31 +99,12 @@ def query(self, x):
# Sort the results in reverse order and return
return sorted(self.output, key=lambda x: x[1], reverse=True)

def handle_files_print(self, read_file):
""" Test """

if self.ready is True:
validwords = 0
invalidwords = 0

with open(read_file, 'r', encoding="utf-8") as reading_file:
for line in reading_file.readlines():
for word in line.split(" "):
if len(self.query(word)) > 0:
validwords += 1
else:
invalidwords += 1

print(f"Valid words: {validwords}\nInvalid words: {invalidwords}")

def handle_files(self, read_file):
""" Test """
""" Internal filehandling for spell_checker """
if self.ready is True:
output_folder = "/watched/output/"
output_file_path = output_folder + str(read_file).rsplit('/', maxsplit=1)[-1]

print(output_file_path)

with open(read_file, 'r', encoding="utf-8") as reading_file:
with open(output_file_path, 'w', encoding="utf-8") as output_file:
for line in reading_file.readlines():
Expand Down
5 changes: 4 additions & 1 deletion src/spell_checking/test/test_spell_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@ class TestCase(unittest.TestCase):
"""Testing the SpellChecker class"""
def test_trie_initializes(self):
""" Test if the Trie is correctly initialized """

# Arrange
sc = SpellChecker()

# Act
# Assert
actual = sc.root.char == ""
self.assertEqual(actual, True)


def test_trie_inserts_correctly(self):
""" Test if the insert function works """

# Arrange
sc = SpellChecker()

# Act
sc.insert('test')
actual = sc.query('t')
Expand All @@ -29,6 +31,7 @@ def test_trie_inserts_correctly(self):

def test_trie_query_correctly(self):
""" Test if the query function works """

# Arrange
sc = SpellChecker()
sc.insert('cat')
Expand Down
2 changes: 1 addition & 1 deletion src/text_extraction/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" Entrance file for the text extraction step of step 1 of the Knox pipeline """
from text_extraction.text_extractor import TextExtractor
from folder_watcher.folder_watcher import FolderWatcher
from text_extraction.text_extractor import TextExtractor

if __name__ == '__main__':
text_extractor = TextExtractor()
Expand Down
3 changes: 2 additions & 1 deletion src/text_extraction/test/test_check_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ class TextExtractionTests(unittest.TestCase):
"""unit testing class"""

def test_compare_text(self):
#Arrange
"""perform ocr on the test image and extracts the words into an extracted.txt file"""

#Arrange
#generate a txt file called extracted.txt from test.png here
extracted_text = get_word_stream('extracted.txt')
expected_text = get_word_stream('expected.txt')
Expand Down
2 changes: 0 additions & 2 deletions src/text_extraction/text_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
import pytesseract
from pdf2image import convert_from_path

# PDF_file = Path(r"./testdata/test5.pdf")
@dataclasses.dataclass
class TextExtractor():
""" Text extraction interface """
def __init__(self):
self.out_dir = "/watched/spell_checking/"
self.dpi = 500
self.image_file_list = []
# self.queue = queue

def read(self, input_file):
""" Inner function that converts and reads PDFs """
Expand Down

0 comments on commit 3d08cbe

Please sign in to comment.