DCGM · stweil · Jun 8, 2022 · Sep 17, 2022 · Jun 13, 2022 · Sep 17, 2022
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ python user_scripts/parse_folder.py -c PATH_TO_config_file_for_OCR_ENGINE.ini -i
 ```
 
 ## Available models
-General layout analysis (printed and handwritten) with european printed OCR specialized to czech newspapers can be [downloaded here](https://www.fit.vut.cz/~ihradis/pero/pero_eu_cz_print_newspapers_2020-10-09.tar.gz). The OCR engine is suitable for most european printed documents. It is specialized for low-quality czech newspapers digitized from microfilms, but it provides very good results for almast all types of printed documents in most languages. If you are interested in processing printed fraktur fonts, handwritten documents or medieval manuscripts, feel free to contact the authors. The newest OCR engines are available at [pero-ocr.fit.vutbr.cz](https://pero-ocr.fit.vutbr.cz). OCR engines are available also through API runing at [pero-ocr.fit.vutbr.cz/api](https://pero-ocr.fit.vutbr.cz/api), [github repository](https://github.com/DCGM/pero-ocr-api).
+General layout analysis (printed and handwritten) with European printed OCR specialized to czech newspapers can be [downloaded here](https://www.fit.vut.cz/~ihradis/pero/pero_eu_cz_print_newspapers_2020-10-09.tar.gz). The OCR engine is suitable for most European printed documents. It is specialized for low-quality czech newspapers digitized from microfilms, but it provides very good results for almost all types of printed documents in most languages. If you are interested in processing printed fraktur fonts, handwritten documents or medieval manuscripts, feel free to contact the authors. The newest OCR engines are available at [pero-ocr.fit.vutbr.cz](https://pero-ocr.fit.vutbr.cz). OCR engines are available also through API running at [pero-ocr.fit.vutbr.cz/api](https://pero-ocr.fit.vutbr.cz/api), [github repository](https://github.com/DCGM/pero-ocr-api).
 
 ## Using the python package
 The package provides two main classes: 

diff --git a/pero_ocr/confidence_estimation.py b/pero_ocr/confidence_estimation.py
@@ -10,7 +10,7 @@ def get_letter_confidence(logits: np.ndarray, alignment: typing.List[int], blank
 
     Args:
         logits: numpy array of (unnormalized) log-probabilities of symbols, organized as (time, symbol).
-        alignment: a list of symbols assigned to indivudual time frames
+        alignment: a list of symbols assigned to individual time frames
         blank_symbol: index of CTC blank in logits, also its representation in alignment
 
     Returns:

diff --git a/pero_ocr/force_alignment.py b/pero_ocr/force_alignment.py
@@ -22,7 +22,7 @@ def force_align(neg_logprobs: np.ndarray, symbols_seq: typing.List[int], blank_s
         A list of symbols corresponding to the most probable path, including CTC blanks.
 
     Raises:
-        ValueError: On various occassions :-)
+        ValueError: On various occasions :-)
     """
     complete_seq, char_sequence = complete_state_seq(symbols_seq, blank_symbol)
     A = hmm_trans_from_string(symbols_seq)

diff --git a/pero_ocr/layout_engines/cnn_layout_engine.py b/pero_ocr/layout_engines/cnn_layout_engine.py
@@ -137,7 +137,7 @@ def parse(self, out_map, downsample):
         print('MAP RES:', out_map.shape)
         out_map[:, :, 4][out_map[:, :, 4] < 0] = 0
 
-        # expand line heights verticaly
+        # expand line heights vertically
         heights_map = ndimage.morphology.grey_dilation(
             out_map[:, :, :2], size=(5, 1, 1))
 

diff --git a/pero_ocr/ocr_engine/line_ocr_engine.py b/pero_ocr/ocr_engine/line_ocr_engine.py
@@ -60,7 +60,7 @@ def process_lines(self, lines, sparse_logits=True, tight_crop_logits=False, no_l
             if line.shape[0] == self.line_px_height:
                 ValueError("Line height needs to be {} for this ocr network and is {} instead.".format(self.line_px_height, line.shape[0]))
             if line.shape[2] == 3:
-                ValueError("Line crops need three color channes, but this one has {}.".format(line.shape[2]))
+                ValueError("Line crops need three color channels, but this one has {}.".format(line.shape[2]))
 
         all_transcriptions = [None]*len(lines)
         all_logits = [None]*len(lines)

diff --git a/user_scripts/merge_ocr_results.py b/user_scripts/merge_ocr_results.py
@@ -14,7 +14,7 @@
 
 def parse_arguments():
     parser = argparse.ArgumentParser(
-        description='Merge results of multiple OCR engines together by picking the most cinfident transcription '
+        description='Merge results of multiple OCR engines together by picking the most confident transcription '
                     'for each text line. The tool takes multiple directories, where each should contain Page XML '
                     'files and corresponding logit files. The file names in each directory must be the same.'
                     'Text lines and their IDs must be the same in each directory.')

diff --git a/user_scripts/parse_folder.py b/user_scripts/parse_folder.py
@@ -37,12 +37,12 @@ def parse_arguments():
     parser.add_argument('--output-logit-path', help='')
     parser.add_argument('--output-alto-path', help='')
     parser.add_argument('--output-transcriptions-file-path', help='')
-    parser.add_argument('--skipp-missing-xml', action='store_true', help='Skipp images which have missing xml.')
+    parser.add_argument('--skipp-missing-xml', action='store_true', help='Skip images which have missing xml.')
-    parser.add_argument('--skipp-missing-xml', action='store_true', help='Skip images which have missing xml.')
+    parser.add_argument('--skip-missing-xml', action='store_true', help='Skip images which have missing xml.')
-    parser.add_argument('--skipp-missing-xml', action='store_true', help='Skip images which have missing xml.')
+    parser.add_argument('--skip-missing-xml', action='store_true', help='Skip images which have missing xml.')
 
     parser.add_argument('--device', choices=["gpu", "cpu"], default="gpu")
     parser.add_argument('--gpu-id', type=int, default=None, help='If set, the computation runs of the specified GPU, otherwise safe-gpu is used to allocate first unused GPU.')
 
-    parser.add_argument('--process-count', type=int, default=1, help='Number of parallel processes (this works mostly only for line cropping and it probably fails and crashes for most other uses cases).')
+    parser.add_argument('--process-count', type=int, default=1, help='Number of parallel processes (this works mostly only for line cropping and it probably fails and crashes for most other use cases).')
     args = parser.parse_args()
     return args