Cleanup for merge

VikParuchuri · Mar 6, 2024 · b56ba61 · b56ba61
1 parent 773cf24
commit b56ba61
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -83,7 +83,7 @@ The `results.json` file will contain a json dictionary where the keys are the in
 
 - `text_lines` - the detected text and bounding boxes for each line
   - `text` - the text in the line
-  - `confidence` - the confidence of the model in the detected text
+  - `confidence` - the confidence of the model in the detected text (0-1)
   - `polygon` - the polygon for the text line in (x1, y1), (x2, y2), (x3, y3), (x4, y4) format.  The points are in clockwise order from the top left.
   - `bbox` - the axis-aligned rectangle for the text line in (x1, y1, x2, y2) format.  (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner.
 - `languages` - the languages specified for the page
@@ -130,7 +130,7 @@ The `results.json` file will contain a json dictionary where the keys are the in
 - `bboxes` - detected bounding boxes for text
   - `bbox` - the axis-aligned rectangle for the text line in (x1, y1, x2, y2) format.  (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner.
   - `polygon` - the polygon for the text line in (x1, y1), (x2, y2), (x3, y3), (x4, y4) format.  The points are in clockwise order from the top left.
-  - `confidence` - the confidence of the model in the detected text
+  - `confidence` - the confidence of the model in the detected text (0-1)
 - `vertical_lines` - vertical lines detected in the document
   - `bbox` - the axis-aligned line coordinates.
 - `horizontal_lines` - horizontal lines detected in the document

diff --git a/detect_layout.py b/detect_layout.py
@@ -21,6 +21,8 @@ def main():
     parser.add_argument("--debug", action="store_true", help="Run in debug mode.", default=False)
     args = parser.parse_args()
 
+    print("Layout detection is currently in beta!  There may be issues with the output.")
+
     model = load_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
     processor = load_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
     det_model = load_model()

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "surya-ocr"
-version = "0.2.2"
+version = "0.2.3"
 description = "OCR and line detection in 90+ languages"
 authors = ["Vik Paruchuri <[email protected]>"]
 readme = "README.md"
@@ -14,7 +14,8 @@ include = [
     "detect_text.py",
     "ocr_text.py",
     "ocr_app.py",
-    "run_ocr_app.py"
+    "run_ocr_app.py",
+    "detect_layout.py"
 ]
 
 [tool.poetry.dependencies]
@@ -45,6 +46,7 @@ playwright = "^1.41.2"
 [tool.poetry.scripts]
 surya_detect = "detect_text:main"
 surya_ocr = "ocr_text:main"
+surya_layout = "detect_layout:main"
 surya_gui = "run_ocr_app:run_app"
 
 [build-system]

diff --git a/surya/postprocessing/math/render.py b/surya/postprocessing/math/render.py
@@ -3,64 +3,6 @@
 import io
 
 
-def text_to_pil(text, target_width, target_height, fontsize=10):
-    html_template = """
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <style>
-            body {
-                margin: 0;
-                padding: 0;
-                display: flex;
-            }
-            #content {
-                font-size: {fontsize}px;
-            }
-        </style>
-    </head>
-    <body>
-        <div id="content">{content}</div>
-    </body>
-    </html>
-    """
-
-    formatted_text = text.replace('\n', '\\n').replace('"', '\\"')
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page()
-        page.set_viewport_size({'width': target_width, 'height': target_height})
-
-        while fontsize <= 30:
-            html_content = html_template.replace("{content}", formatted_text).replace("{fontsize}", str(fontsize))
-            page.set_content(html_content)
-
-            dimensions = page.evaluate("""() => {
-                const render = document.getElementById('content');
-                return {
-                    width: render.offsetWidth,
-                    height: render.offsetHeight
-                };
-            }""")
-
-            if dimensions['width'] >= target_width or dimensions['height'] >= target_height:
-                fontsize -= 1
-                break
-            else:
-                fontsize += 1
-
-        html_content = html_template.replace("{content}", formatted_text).replace("{fontsize}", str(fontsize))
-        page.set_content(html_content)
-
-        screenshot_bytes = page.screenshot()
-        browser.close()
-
-        image_stream = io.BytesIO(screenshot_bytes)
-        pil_image = Image.open(image_stream)
-        pil_image.load()
-        return pil_image
-
-
 def latex_to_pil(latex_code, target_width, target_height, fontsize=18):
     html_template = """
     <!DOCTYPE html>

diff --git a/surya/postprocessing/text.py b/surya/postprocessing/text.py
@@ -6,8 +6,8 @@
 
 from surya.schema import TextLine
 from surya.settings import settings
-from surya.postprocessing.math.latex import is_latex, slice_latex
-from surya.postprocessing.math.render import latex_to_pil, text_to_pil
+from surya.postprocessing.math.latex import is_latex
+from surya.postprocessing.math.render import latex_to_pil
 
 
 def sort_text_lines(lines: List[TextLine], tolerance=1.25):