diff --git a/README.md b/README.md index 61b0b1ff..54d63029 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ The `results.json` file will contain a json dictionary where the keys are the in - `text_lines` - the detected text and bounding boxes for each line - `text` - the text in the line - - `confidence` - the confidence of the model in the detected text + - `confidence` - the confidence of the model in the detected text (0-1) - `polygon` - the polygon for the text line in (x1, y1), (x2, y2), (x3, y3), (x4, y4) format. The points are in clockwise order from the top left. - `bbox` - the axis-aligned rectangle for the text line in (x1, y1, x2, y2) format. (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner. - `languages` - the languages specified for the page @@ -130,7 +130,7 @@ The `results.json` file will contain a json dictionary where the keys are the in - `bboxes` - detected bounding boxes for text - `bbox` - the axis-aligned rectangle for the text line in (x1, y1, x2, y2) format. (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner. - `polygon` - the polygon for the text line in (x1, y1), (x2, y2), (x3, y3), (x4, y4) format. The points are in clockwise order from the top left. - - `confidence` - the confidence of the model in the detected text + - `confidence` - the confidence of the model in the detected text (0-1) - `vertical_lines` - vertical lines detected in the document - `bbox` - the axis-aligned line coordinates. - `horizontal_lines` - horizontal lines detected in the document diff --git a/detect_layout.py b/detect_layout.py index 0394fe87..8eeab91e 100644 --- a/detect_layout.py +++ b/detect_layout.py @@ -21,6 +21,8 @@ def main(): parser.add_argument("--debug", action="store_true", help="Run in debug mode.", default=False) args = parser.parse_args() + print("Layout detection is currently in beta! There may be issues with the output.") + model = load_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT) processor = load_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT) det_model = load_model() diff --git a/pyproject.toml b/pyproject.toml index f5ccaef0..8aff4834 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "surya-ocr" -version = "0.2.2" +version = "0.2.3" description = "OCR and line detection in 90+ languages" authors = ["Vik Paruchuri "] readme = "README.md" @@ -14,7 +14,8 @@ include = [ "detect_text.py", "ocr_text.py", "ocr_app.py", - "run_ocr_app.py" + "run_ocr_app.py", + "detect_layout.py" ] [tool.poetry.dependencies] @@ -45,6 +46,7 @@ playwright = "^1.41.2" [tool.poetry.scripts] surya_detect = "detect_text:main" surya_ocr = "ocr_text:main" +surya_layout = "detect_layout:main" surya_gui = "run_ocr_app:run_app" [build-system] diff --git a/surya/postprocessing/math/render.py b/surya/postprocessing/math/render.py index 0fa2acc0..761334a0 100644 --- a/surya/postprocessing/math/render.py +++ b/surya/postprocessing/math/render.py @@ -3,64 +3,6 @@ import io -def text_to_pil(text, target_width, target_height, fontsize=10): - html_template = """ - - - - - - -
{content}
- - - """ - - formatted_text = text.replace('\n', '\\n').replace('"', '\\"') - with sync_playwright() as p: - browser = p.chromium.launch() - page = browser.new_page() - page.set_viewport_size({'width': target_width, 'height': target_height}) - - while fontsize <= 30: - html_content = html_template.replace("{content}", formatted_text).replace("{fontsize}", str(fontsize)) - page.set_content(html_content) - - dimensions = page.evaluate("""() => { - const render = document.getElementById('content'); - return { - width: render.offsetWidth, - height: render.offsetHeight - }; - }""") - - if dimensions['width'] >= target_width or dimensions['height'] >= target_height: - fontsize -= 1 - break - else: - fontsize += 1 - - html_content = html_template.replace("{content}", formatted_text).replace("{fontsize}", str(fontsize)) - page.set_content(html_content) - - screenshot_bytes = page.screenshot() - browser.close() - - image_stream = io.BytesIO(screenshot_bytes) - pil_image = Image.open(image_stream) - pil_image.load() - return pil_image - - def latex_to_pil(latex_code, target_width, target_height, fontsize=18): html_template = """ diff --git a/surya/postprocessing/text.py b/surya/postprocessing/text.py index ef02c7af..10a00b37 100644 --- a/surya/postprocessing/text.py +++ b/surya/postprocessing/text.py @@ -6,8 +6,8 @@ from surya.schema import TextLine from surya.settings import settings -from surya.postprocessing.math.latex import is_latex, slice_latex -from surya.postprocessing.math.render import latex_to_pil, text_to_pil +from surya.postprocessing.math.latex import is_latex +from surya.postprocessing.math.render import latex_to_pil def sort_text_lines(lines: List[TextLine], tolerance=1.25):