diff --git a/src/spell_checking/test/Test_File.jpg b/src/spell_checking/test/test_files/Test_File.jpg similarity index 100% rename from src/spell_checking/test/Test_File.jpg rename to src/spell_checking/test/test_files/Test_File.jpg diff --git a/src/spell_checking/test/expected.txt b/src/spell_checking/test/test_files/expected.txt similarity index 100% rename from src/spell_checking/test/expected.txt rename to src/spell_checking/test/test_files/expected.txt diff --git a/src/spell_checking/test/test_integration_spellchecker.py b/src/spell_checking/test/test_integration_spellchecker.py index b52d0a9..fc3ed4a 100644 --- a/src/spell_checking/test/test_integration_spellchecker.py +++ b/src/spell_checking/test/test_integration_spellchecker.py @@ -1,6 +1,7 @@ """Module providing functionaly needed to run integration test""" import unittest import os +import shutil from spell_checking.spell_checker import SpellChecker from text_extraction.text_extractor import TextExtractor @@ -10,15 +11,22 @@ class SpellcheckerIntegrationTests(unittest.TestCase): def test_integration_spellchecker(self): """Method testing if the input of the textextractor cen be received by the spellchecker""" #Arrange + src = "src/spell_checking/test/test_files/Test_File.jpg" + dst = "src/spell_checking/test/Test_File.jpg" + ground_truth = "src/spell_checking/test/test_files/expected.txt" text_extractor = TextExtractor() text_extractor.out_dir = "/watched/spell_checking/" spellchecker = SpellChecker("src/spell_checking/wordList.txt") spellchecker.out_dir = "/watched/output" - with open("src/spell_checking/test/expected.txt", 'r', encoding="utf-8") as expected_text: + #register expected.txt as a list + with open(ground_truth, 'r', encoding="utf-8") as expected_text: expected_text = expected_text.read().lower().split() print(f'Expected text: {expected_text}') + #copies test file since text extractor deletes files once processed + shutil.copy(src, dst) #Act + #if a txt file has already been processed do not run extraction again if not os.path.exists("/watched/output/Test_File.txt"): text_extractor.read("src/spell_checking/test/Test_File.jpg") spellchecker.handle_files("/watched/spell_checking/Test_File.txt") @@ -26,6 +34,9 @@ def test_integration_spellchecker(self): output = output.read().lower().split() print(f'Spellchecked text: {output}') status = bool(output == expected_text) + #deletes the testing file that was copied over + if os.path.exists("src/spell_checking/test/Test_File.jpg"): + os.remove("src/spell_checking/test/Test_File.jpg") #Assert self.assertTrue(status, "The text was not extracted correctly") diff --git a/src/text_extraction/test/__init__.py b/src/text_extraction/test/__init__.py new file mode 100644 index 0000000..939b27e --- /dev/null +++ b/src/text_extraction/test/__init__.py @@ -0,0 +1 @@ +# pylint: skip-file \ No newline at end of file diff --git a/src/text_extraction/test/expected.txt b/src/text_extraction/test/expected.txt deleted file mode 100644 index 707ef5c..0000000 --- a/src/text_extraction/test/expected.txt +++ /dev/null @@ -1,12 +0,0 @@ -Test of Text Extraction - -KNOX Group 20 - -October 2023 - -1 Introduction - -Hello World This is just some random text to determine if the text extractor -generally has the ability to extract text from the PDF accurately. - -Here is a separated bit of text. \ No newline at end of file diff --git a/src/text_extraction/test/extracted.txt b/src/text_extraction/test/extracted.txt index 1d9c7ff..4a6bd39 100644 --- a/src/text_extraction/test/extracted.txt +++ b/src/text_extraction/test/extracted.txt @@ -1,11 +1,2 @@ -Test of Text Extraction - -KNOX Group 20 -October 2023 - -1 Introduction - -Hello World This is just some random text to determine if the text extractor -generally has the ability to extract text from the PDF accurately. - -Here is a separated bit of text. +Test file + \ No newline at end of file diff --git a/src/text_extraction/test/test.png b/src/text_extraction/test/test.png deleted file mode 100644 index 9b3fc2b..0000000 Binary files a/src/text_extraction/test/test.png and /dev/null differ diff --git a/src/text_extraction/test/test_check_text.py b/src/text_extraction/test/test_check_text.py index d2b9895..f57fb27 100644 --- a/src/text_extraction/test/test_check_text.py +++ b/src/text_extraction/test/test_check_text.py @@ -1,10 +1,13 @@ """provides unit test functionality""" import unittest +import shutil +import os +from text_extraction.text_extractor import TextExtractor def get_word_stream(path): """splits string read into individual words to compare the extracted words""" with open(path, encoding='utf8', mode='r') as file: - words = file.read().split() + words = file.read().lower().split() return words class TextExtractionTests(unittest.TestCase): @@ -14,12 +17,23 @@ def test_compare_text(self): """perform ocr on the test image and extracts the words into an extracted.txt file""" #Arrange - #generate a txt file called extracted.txt from test.png here - extracted_text = get_word_stream('extracted.txt') - expected_text = get_word_stream('expected.txt') + src = "src/text_extraction/test/test_files/test.png" + dst = "src/text_extraction/test/extracted.png" + text_extractor = TextExtractor() + text_extractor.out_dir = "src/text_extraction/test/" + #copies test file since text extractor deletes files once processed + shutil.copy(src, dst) #Act + text_extractor.read("src/text_extraction/test/extracted.png") + #convert expected and extracted texts into lists + extracted_text = get_word_stream("src/text_extraction/test/extracted.txt") + expected_text = get_word_stream("src/text_extraction/test/test_files/expected.txt") + #compares extracted text to the expected text to see if extraction was a success result = extracted_text == expected_text + #deletes the testing file that was copied over + if os.path.exists("src/text_extraction/test/extracted.png"): + os.remove("src/text_extraction/test/extracted.png") #Assert - self.assertTrue(result, 'the results is false thus the extracted is not correct') + self.assertTrue(result, 'the results is false thus the extracted text is not correct') diff --git a/src/text_extraction/test/test_files/expected.txt b/src/text_extraction/test/test_files/expected.txt new file mode 100644 index 0000000..84362ca --- /dev/null +++ b/src/text_extraction/test/test_files/expected.txt @@ -0,0 +1 @@ +Test file \ No newline at end of file diff --git a/src/text_extraction/test/test_files/test.png b/src/text_extraction/test/test_files/test.png new file mode 100644 index 0000000..7fee212 Binary files /dev/null and b/src/text_extraction/test/test_files/test.png differ