Knox-AAU · Darth-Sand · Dec 9, 2023 · Dec 9, 2023 · Dec 9, 2023 · Dec 9, 2023
diff --git a/src/spell_checking/test/Test_File.jpg → ...ll_checking/test/test_files/Test_File.jpg b/src/spell_checking/test/Test_File.jpg → ...ll_checking/test/test_files/Test_File.jpg
diff --git a/src/spell_checking/test/expected.txt → ...ell_checking/test/test_files/expected.txt b/src/spell_checking/test/expected.txt → ...ell_checking/test/test_files/expected.txt
diff --git a/src/spell_checking/test/test_integration_spellchecker.py b/src/spell_checking/test/test_integration_spellchecker.py
@@ -1,6 +1,7 @@
 """Module providing functionaly needed to run integration test"""
 import unittest
 import os
+import shutil
 from spell_checking.spell_checker import SpellChecker
 from text_extraction.text_extractor import TextExtractor
 
@@ -10,22 +11,32 @@ class SpellcheckerIntegrationTests(unittest.TestCase):
     def test_integration_spellchecker(self):
         """Method testing if the input of the textextractor cen be received by the spellchecker"""
         #Arrange
+        src = "src/spell_checking/test/test_files/Test_File.jpg"
+        dst = "src/spell_checking/test/Test_File.jpg"
+        ground_truth = "src/spell_checking/test/test_files/expected.txt"
         text_extractor = TextExtractor()
         text_extractor.out_dir = "/watched/spell_checking/"
         spellchecker = SpellChecker("src/spell_checking/wordList.txt")
         spellchecker.out_dir = "/watched/output"
-        with open("src/spell_checking/test/expected.txt", 'r', encoding="utf-8") as expected_text:
+        #register expected.txt as a list
+        with open(ground_truth, 'r', encoding="utf-8") as expected_text:
             expected_text = expected_text.read().lower().split()
             print(f'Expected text: {expected_text}')
+        #copies test file since text extractor deletes files once processed
+        shutil.copy(src, dst)
 
         #Act
+        #if a txt file has already been processed do not run extraction again
         if not os.path.exists("/watched/output/Test_File.txt"):
             text_extractor.read("src/spell_checking/test/Test_File.jpg")
             spellchecker.handle_files("/watched/spell_checking/Test_File.txt")
         with open("/watched/output/Test_File.txt", 'r', encoding="utf-8") as output:
             output = output.read().lower().split()
             print(f'Spellchecked text: {output}')
             status = bool(output == expected_text)
+        #deletes the testing file that was copied over
+        if os.path.exists("src/spell_checking/test/Test_File.jpg"):
+            os.remove("src/spell_checking/test/Test_File.jpg")
 
         #Assert
         self.assertTrue(status, "The text was not extracted correctly")

diff --git a/src/text_extraction/test/__init__.py b/src/text_extraction/test/__init__.py
@@ -0,0 +1 @@
+# pylint: skip-file
diff --git a/src/text_extraction/test/expected.txt b/src/text_extraction/test/expected.txt
diff --git a/src/text_extraction/test/extracted.txt b/src/text_extraction/test/extracted.txt
@@ -1,11 +1,2 @@
-Test of Text Extraction
-
-KNOX Group 20
-October 2023
-
-1 Introduction
-
-Hello World This is just some random text to determine if the text extractor
-generally has the ability to extract text from the PDF accurately.
-
-Here is a separated bit of text.
+Test file
+
diff --git a/src/text_extraction/test/test.png b/src/text_extraction/test/test.png
diff --git a/src/text_extraction/test/test_check_text.py b/src/text_extraction/test/test_check_text.py
@@ -1,10 +1,13 @@
 """provides unit test functionality"""
 import unittest
+import shutil
+import os
+from text_extraction.text_extractor import TextExtractor
 
 def get_word_stream(path):
     """splits string read into individual words to compare the extracted words"""
     with open(path, encoding='utf8', mode='r') as file:
-        words = file.read().split()
+        words = file.read().lower().split()
         return words
 
 class TextExtractionTests(unittest.TestCase):
@@ -14,12 +17,23 @@ def test_compare_text(self):
         """perform ocr on the test image and extracts the words into an extracted.txt file"""
 
         #Arrange
-        #generate a txt file called extracted.txt from test.png here
-        extracted_text = get_word_stream('extracted.txt')
-        expected_text = get_word_stream('expected.txt')
+        src = "src/text_extraction/test/test_files/test.png"
+        dst = "src/text_extraction/test/extracted.png"
+        text_extractor = TextExtractor()
+        text_extractor.out_dir = "src/text_extraction/test/"
+        #copies test file since text extractor deletes files once processed
+        shutil.copy(src, dst)
 
         #Act
+        text_extractor.read("src/text_extraction/test/extracted.png")
+        #convert expected and extracted texts into lists
+        extracted_text = get_word_stream("src/text_extraction/test/extracted.txt")
+        expected_text = get_word_stream("src/text_extraction/test/test_files/expected.txt")
+        #compares extracted text to the expected text to see if extraction was a success
         result = extracted_text == expected_text
+        #deletes the testing file that was copied over
+        if os.path.exists("src/text_extraction/test/extracted.png"):
+            os.remove("src/text_extraction/test/extracted.png")
 
         #Assert
-        self.assertTrue(result, 'the results is false thus the extracted is not correct')
+        self.assertTrue(result, 'the results is false thus the extracted text is not correct')
diff --git a/src/text_extraction/test/test_files/expected.txt b/src/text_extraction/test/test_files/expected.txt
@@ -0,0 +1 @@
+Test file
diff --git a/src/text_extraction/test/test_files/test.png b/src/text_extraction/test/test_files/test.png