Skip to content

Commit

Permalink
[app][rfct] alter behavior for empty lines
Browse files Browse the repository at this point in the history
  • Loading branch information
M3ssman committed May 31, 2022
1 parent 0bf884a commit 1b06233
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
5 changes: 3 additions & 2 deletions lib/ocr_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 +480,9 @@ def _sanitize_wraps(lines):
for i, line in enumerate(lines):
if i < len(lines) - 1 and line.endswith("-"):
next_line = lines[i + 1]
if not next_line.strip():
raise RuntimeError(f"cant sanitize '{lines[i]} with empty next_line")
if len(next_line.strip()) == 0:
# encountered empty next line, no merge possible
continue
next_line_tokens = next_line.split()
nextline_first_token = next_line_tokens.pop(0)
# join the rest of valid next line
Expand Down
9 changes: 4 additions & 5 deletions ocr_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import sys
import tempfile
import time
import traceback

# pylint: disable=unused-import
# import statement *is_REALLY* necessary
Expand Down Expand Up @@ -426,8 +425,8 @@ def _execute_pipeline(*args):
start_path,
step,
exc.args[0])
# OSError means something really severe, like
# non-existing resources/connections that will harm
# OSError means something really severe, like
# non-existing resources/connections that will harm
# all images in pipeline, therefore signal halt
except OSError as os_exc:
pipeline.logger.critical(
Expand Down Expand Up @@ -486,7 +485,7 @@ def _execute_pipeline(*args):
ARGS = vars(APP_ARGUMENTS.parse_args())

DATA_PATH = ARGS["data_path"]
if not "," in DATA_PATH and not os.path.isdir(DATA_PATH):
if "," not in DATA_PATH and not os.path.isdir(DATA_PATH):
print(
f"[ERROR] data_path path '{DATA_PATH}' invalid!", file=sys.stderr)
sys.exit(1)
Expand Down Expand Up @@ -514,7 +513,7 @@ def _execute_pipeline(*args):
with concurrent.futures.ProcessPoolExecutor(max_workers=EXECUTORS) as executor:
RESULTS = list(executor.map(_execute_pipeline, INPUT_NUMBERED))
pipeline.logger.info("having %d workflow results", len(RESULTS))
estimations = [r for r in RESULTS if r[1] > MARK_MISSING_ESTM]
estimations = [r for r in RESULTS if r is not None and r[1] > MARK_MISSING_ESTM]
if estimations:
pipeline.store_estimations(estimations)
else:
Expand Down

0 comments on commit 1b06233

Please sign in to comment.