Skip to content

Commit

Permalink
Update TextLine.transcription_confidence when logits are available
Browse files Browse the repository at this point in the history
  • Loading branch information
ibenes committed Apr 28, 2022
1 parent d59c186 commit ecbbd7a
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions pero_ocr/document_ocr/page_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,22 +461,26 @@ def __init__(self, config, config_path=''):
self.decoder = page_decoder_factory(config, config_path=config_path)

@staticmethod
def compute_line_confidence(line, threshold):
def compute_line_confidence(line, threshold=None):
logits = line.get_dense_logits()
log_probs = logits - np.logaddexp.reduce(logits, axis=1)[:, np.newaxis]
best_ids = np.argmax(log_probs, axis=-1)
best_probs = np.exp(np.max(log_probs, axis=-1))
worst_best_prob = get_prob(best_ids, best_probs)
print(worst_best_prob, np.sum(np.exp(best_probs) < threshold), best_probs.shape, np.nonzero(np.exp(best_probs) < threshold))
# print(worst_best_prob, np.sum(np.exp(best_probs) < threshold), best_probs.shape, np.nonzero(np.exp(best_probs) < threshold))
# for i in np.nonzero(np.exp(best_probs) < threshold)[0]:
# print(best_probs[i-1:i+2], best_ids[i-1:i+2])

return worst_best_prob

def update_confidences(self, page_layout):
for line in page_layout.lines_iterator():
if line.logits is not None:
line.transcription_confidence = self.compute_line_confidence(line)

def filter_confident_lines(self, page_layout):
for region in page_layout.regions:
region.lines = [line for line in region.lines
if PageParser.compute_line_confidence(line, self.filter_confident_lines_threshold) > self.filter_confident_lines_threshold]
region.lines = [line for line in region.lines if line.transcription_confidence > self.filter_confident_lines_threshold]
return page_layout

def process_page(self, image, page_layout):
Expand All @@ -489,6 +493,9 @@ def process_page(self, image, page_layout):
page_layout = self.ocr.process_page(image, page_layout)
if self.run_decoder:
page_layout = self.decoder.process_page(page_layout)

self.update_confidences(page_layout)

if self.filter_confident_lines_threshold > 0:
page_layout = self.filter_confident_lines(page_layout)

Expand Down

2 comments on commit ecbbd7a

@michal-hradis
Copy link
Contributor

@michal-hradis michal-hradis commented on ecbbd7a Jun 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ibenes
Karel, this is breaking our API, where we use confidence threshold 0.66. Results are missing text lines and page confidence is way lower. The second is not a problem the first is. API should behave as before. Should we change the threshold, API code or this code?

run_client.py from line 156:
` alto_xml = page_layout.to_altoxml_string(ocr_processing=ocr_processing,
min_line_confidence=args.min_confidence)

if args.min_confidence > 0:
    for region in page_layout.regions:
        region.lines = \
            [l for l in region.lines if
             l.transcription_confidence and l.transcription_confidence > args.min_confidence]`

@ibenes
Copy link
Contributor Author

@ibenes ibenes commented on ecbbd7a Aug 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my perspective, the issue runs deep. I see its core in the fact that Layout.to_altoxml_string() does more than just producing the string: it computes confidences of its own, stores them in the respective TextLines, filters those lines, and finally, this is relied upon.

I think we should sit down and decide which confidence measure we want -- and the computation should not be a side product of producing some string from Layout; I think that then, the to_altoxml_string() will even be able to enjoy a bit of cleanup. If we want both the ALTO thing and the "worst of best", they cannot live in the same member variable.

Please sign in to comment.