Merge branch 'develop' into music

DCGM · Sep 24, 2024 · d9c64cd · d9c64cd
2 parents 7c4251e + 3311055
commit d9c64cd
Showing 1 changed file with 15 additions and 2 deletions.
diff --git a/pero_ocr/core/layout.py b/pero_ocr/core/layout.py
@@ -229,6 +229,9 @@ def to_altoxml_text(self, text_line, arabic_helper,
         if arabic_helper.is_arabic_line(self.transcription):
             arabic_line = True
 
+        logits = None
+        logprobs = None
+        aligned_letters = None
         try:
             label = self.get_labels()
             blank_idx = self.logits.shape[1] - 1
@@ -238,7 +241,17 @@ def to_altoxml_text(self, text_line, arabic_helper,
             aligned_letters = align_text(-logprobs, np.array(label), blank_idx)
         except (ValueError, IndexError, TypeError) as e:
             logger.warning(f'Error: Alto export, unable to align line {self.id} due to exception: {e}.')
-            self.transcription_confidence = 0.0
+
+            if logits is not None:
+                max_val = np.max(logits, axis=1)
+                logits = logits - max_val[:, np.newaxis]
+                probs = np.exp(logits)
+                probs = probs / np.sum(probs, axis=1, keepdims=True)
+                probs = np.max(probs, axis=1)
+                self.transcription_confidence = np.quantile(probs, .50)
+            else:
+                self.transcription_confidence = 0.0
+
             average_word_width = (text_line_hpos + text_line_width) / len(self.transcription.split())
             for w, word in enumerate(self.transcription.split()):
                 string = ET.SubElement(text_line, "String")
@@ -1134,4 +1147,4 @@ def create_ocr_processing_element(id: str = "IdOcr",
 
 def normalize_text(text: str) -> str:
     """Normalize text to ASCII characters. (e.g. Obrázek -> Obrazek)"""
-    return unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('ascii')
+    return unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('ascii')