From 331105542e3db2397c447063728f1dc6d02763ac Mon Sep 17 00:00:00 2001 From: Michal Hradis Date: Sun, 22 Sep 2024 19:57:19 +0200 Subject: [PATCH] ALTO now computes reasonable confidences even for lines which fail to align (e.g. from transformers). --- pero_ocr/core/layout.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pero_ocr/core/layout.py b/pero_ocr/core/layout.py index bc149d7..7c91594 100644 --- a/pero_ocr/core/layout.py +++ b/pero_ocr/core/layout.py @@ -470,6 +470,9 @@ def to_altoxml_string(self, ocr_processing_element: ET.SubElement = None, page_u text_line.set("HEIGHT", str(int(text_line_height))) text_line.set("WIDTH", str(int(text_line_width))) + logits = None + logprobs = None + aligned_letters = None try: chars = [i for i in range(len(line.characters))] char_to_num = dict(zip(line.characters, chars)) @@ -491,7 +494,16 @@ def to_altoxml_string(self, ocr_processing_element: ET.SubElement = None, page_u aligned_letters = align_text(-logprobs, np.array(label), blank_idx) except (ValueError, IndexError, TypeError) as e: logger.warning(f'Error: Alto export, unable to align line {line.id} due to exception {e}.') - line.transcription_confidence = 0 + + if logits is not None: + max_val = np.max(logits, axis=1) + logits = logits - max_val[:, np.newaxis] + probs = np.exp(logits) + probs = probs / np.sum(probs, axis=1, keepdims=True) + probs = np.max(probs, axis=1) + line.transcription_confidence = np.quantile(probs, .50) + else: + line.transcription_confidence = 0 average_word_width = (text_line_hpos + text_line_width) / len(line.transcription.split()) for w, word in enumerate(line.transcription.split()): string = ET.SubElement(text_line, "String")