From 0d964e47d5a865ef6d247ed12695cc127071f4e9 Mon Sep 17 00:00:00 2001
From: Oksana Belyaeva <belyaeva@ispras.ru>
Date: Tue, 3 Dec 2024 16:07:58 +0300
Subject: [PATCH 1/4] TLDR-872 rewrite benchmark correctness (#510)

---
 .../benchmarks/benchmarks_tl_correctness.json |  21 ---
 .../benchmarks/benchmarks_tl_correctness.txt  |  27 ++++
 scripts/benchmark_tl_correctness.py           | 123 ++++++++++++------
 3 files changed, 108 insertions(+), 63 deletions(-)
 delete mode 100644 resources/benchmarks/benchmarks_tl_correctness.json
 create mode 100644 resources/benchmarks/benchmarks_tl_correctness.txt

diff --git a/resources/benchmarks/benchmarks_tl_correctness.json b/resources/benchmarks/benchmarks_tl_correctness.json
deleted file mode 100644
index f3fee769..00000000
--- a/resources/benchmarks/benchmarks_tl_correctness.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "version": "0.11.2",
-    "guessing_the_correctness_of_the_text": {
-        "percentage_of_guessed_correct_tl": 0.9785407725321889,
-        "list_of_file_with_incorrect_tl": [
-            "hogans-federal-motion-for-a-preliminary-injunction_1616093696_24.pdf",
-            "afcea-spy.pdf",
-            "b96a__usmc-combat-camera-directory.pdf",
-            "access-the-vision-for-2013.pdf",
-            "demystifying-nge-rock-ridge_1643518222_537.pdf"
-        ],
-        "percentage_of_guessed_incorrect_tl": 0.7916666666666666,
-        "list_of_file_with_correct_tl": [
-            "PE20_1616439522_1.pdf",
-            "slides.pdf",
-            "PE157_1616278053_181.pdf",
-            "EXTERNAL FORMS - SUPPORTING DOCUMENTATION-ESHS9615401 2017_07_27 11_22_39_1616049888_455.pdf",
-            "╨º╨£╨£╨ñ_╨É╨▒╨░╨║╤â╨╝╨╛╨▓_╤â╤ç╨╡╨▒╨╜╨╕╨║.pdf"
-        ]
-    }
-}
\ No newline at end of file
diff --git a/resources/benchmarks/benchmarks_tl_correctness.txt b/resources/benchmarks/benchmarks_tl_correctness.txt
new file mode 100644
index 00000000..351e9fd8
--- /dev/null
+++ b/resources/benchmarks/benchmarks_tl_correctness.txt
@@ -0,0 +1,27 @@
+Version = 
+
+--- Balanced Accuracy --- = 0.843482905982906
+--- Accuracy --- = 0.9534883720930233
+--- Weighted --- Precision = 0.9519564983695847, Recall=0.9534883720930233, F1=0.9525762106576597
+--- Class corrected --- : Precision = 0.9703389830508474, Recall=0.9786324786324786, F1=0.9744680851063829
+--- Class incorrected --- : Precision = 0.7727272727272727, Recall=0.7083333333333334, F1=0.7391304347826088
+--- AVG Time corrected pdfs --- = 3.2058254999992175
+--- AVG Time incorrected pdfs --- = 4.9308231472969055
+--- AVG Time all pdfs --- = 3.3662903974222584
+
+
+--- Failed corrected pdfs --- : 
+hogans-federal-motion-for-a-preliminary-injunction_1616093696_24.pdf
+demystifying-nge-rock-ridge_1643518222_537.pdf
+b96a__usmc-combat-camera-directory.pdf
+afcea-spy.pdf
+access-the-vision-for-2013.pdf
+
+--- Failed incorrected pdfs --- : 
+Gromov_Dubova_-_Primenenie_metodov_TFKP_k_vychisleniyu_opredelennykh_integralov.pdf
+PE157_1616278053_181.pdf
+╨º╨£╨£╨ñ_╨É╨▒╨░╨║╤â╨╝╨╛╨▓_╤â╤ç╨╡╨▒╨╜╨╕╨║.pdf
+EXTERNAL FORMS - SUPPORTING DOCUMENTATION-ESHS9615401 2017_07_27 11_22_39_1616049888_455.pdf
+slides.pdf
+PE20_1616439522_1.pdf
+Catalog-2020_dealers mail (1).pdf
\ No newline at end of file
diff --git a/scripts/benchmark_tl_correctness.py b/scripts/benchmark_tl_correctness.py
index 2538cdef..5469f309 100644
--- a/scripts/benchmark_tl_correctness.py
+++ b/scripts/benchmark_tl_correctness.py
@@ -1,10 +1,11 @@
-import json
 import os
 import zipfile
-from collections import OrderedDict, namedtuple
+from time import time
 
+import numpy as np
 import requests
 import wget
+from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_recall_fscore_support
 from tqdm import tqdm
 
 from dedoc.config import get_config
@@ -12,7 +13,7 @@
 
 path_result = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "resources", "benchmarks"))
 os.makedirs(path_result, exist_ok=True)
-path_result = os.path.join(path_result, "benchmarks_tl_correctness.json")
+path_result = os.path.join(path_result, "benchmarks_tl_correctness.txt")
 
 """
 Experiments are available -> https://github.com/alexander1999-hub/txt_layer_correctness/tree/main :
@@ -24,37 +25,10 @@
 """
 
 host = "http://localhost:1231"
-param_dist_errors = namedtuple("Param", ("total_file_size", "total_incorrect_files", "failed"))
 
 
-def errors_param_for_text_layer(path_base: str, tl_type: str, tl_path: str, parameters: dict) -> namedtuple:
-    failed = []
-    total_incorrect_files = 0
-    directory = os.path.join(path_base, tl_path)
-    files_list = [file_name for file_name in os.listdir(directory) if file_name.endswith(".pdf")]
-    total_file_size = len(files_list)
-    print(f"Files: {files_list}\nFiles number: {total_file_size}")
-    for file in tqdm(files_list):
-        file_path = os.path.join(directory, file)
-        r = send_file(host=host, file_name=file, file_path=file_path, parameters=parameters)
-
-        found = False
-        for warning in r["warnings"]:
-            if warning.find(tl_type) != -1:
-                found = True
-                break
-
-        if found:
-            total_incorrect_files += 1
-            failed.append(file)
-    return param_dist_errors(total_file_size, total_incorrect_files, failed)
-
-
-if __name__ == "__main__":
-    data_dir = os.path.join(get_config()["intermediate_data_path"], "text_layer_correctness_data")
-    os.makedirs(data_dir, exist_ok=True)
+def download_dataset(data_dir: str) -> str:
     benchmark_data_dir = os.path.join(data_dir, "data_with_text_layer")
-
     if not os.path.isdir(benchmark_data_dir):
         path_out = os.path.join(data_dir, "data_with_text_layer.zip")
         wget.download("https://at.ispras.ru/owncloud/index.php/s/axacSYXf7YCLcbb/download", path_out)
@@ -67,20 +41,85 @@ def errors_param_for_text_layer(path_base: str, tl_type: str, tl_path: str, para
 
     assert os.path.isdir(benchmark_data_dir)
 
-    result = OrderedDict()
-    result["version"] = requests.get(f"{host}/version").text
+    return benchmark_data_dir
+
+
+def get_metrics(max_eval_pdf: int = 10000) -> None:
+    data_dir = os.path.join(get_config()["intermediate_data_path"], "text_layer_correctness_data")
+    os.makedirs(data_dir, exist_ok=True)
+
+    data_dir = download_dataset(data_dir)
+
+    folder = os.path.join(data_dir, "data_correct_text_layer")
+    correct_files = np.array([os.path.join(folder, file_name) for file_name in os.listdir(folder) if file_name.endswith(".pdf")])
+    folder = os.path.join(data_dir, "data_incorrect_text_layer")
+    incorrect_files = np.array([os.path.join(folder, file_name) for file_name in os.listdir(folder) if file_name.endswith(".pdf")])
+
+    files = np.append(correct_files, incorrect_files)
+
+    labels = np.empty(files.size)
+    labels[:correct_files.size] = 0  # "correct"
+    labels[correct_files.size:] = 1  # "incorrect"
+
+    failed_corrected_pdfs = []
+    failed_incorrected_pdfs = []
+
+    # run pipeline for prediction
+    predicts = np.empty(files.size)
     parameters = dict(pdf_with_text_layer="auto", pages="1:1")
-    result_item = OrderedDict()
+    times_correct, times_incorrect = [], []
+
+    count = min(max_eval_pdf, len(files))
+
+    for i, file_path in enumerate(tqdm(files[:count])):
+        file_name = file_path.split("/")[-1]
+
+        time_b = time()
+        r = send_file(host=host, file_name=file_name, file_path=file_path, parameters=parameters)
+        time_eval = time() - time_b
+
+        if labels[i] == 0:
+            times_correct.append(time_eval)
+        else:
+            times_incorrect.append(time_eval)
+
+        predicts[i] = 3  # "failed" not handling
+        for warning in r["warnings"]:
+            if "has incorrect textual layer" in warning:
+                predicts[i] = 1  # "incorrect"
+            if "has a correct textual layer" in warning:
+                predicts[i] = 0  # "correct"
 
-    incorrect_tl_result = errors_param_for_text_layer(benchmark_data_dir, " incorrect ", "data_correct_text_layer", parameters)
-    result_item["percentage_of_guessed_correct_tl"] = 1 - incorrect_tl_result.total_incorrect_files / incorrect_tl_result.total_file_size
-    result_item["list_of_file_with_incorrect_tl"] = incorrect_tl_result.failed
+        if predicts[i] != labels[i]:
+            failed_corrected_pdfs.append(file_name) if labels[i] == 0 else failed_incorrected_pdfs.append(file_name)
 
-    correct_tl_result = errors_param_for_text_layer(benchmark_data_dir, " correct ", "data_incorrect_text_layer", parameters)
-    result_item["percentage_of_guessed_incorrect_tl"] = 1 - correct_tl_result.total_incorrect_files / correct_tl_result.total_file_size
-    result_item["list_of_file_with_correct_tl"] = correct_tl_result.failed
-    result["guessing_the_correctness_of_the_text"] = result_item
+    labels, predicts = labels[:count], predicts[:count]
 
+    b_accuracy = balanced_accuracy_score(labels, predicts)
+    accuracy = accuracy_score(labels, predicts)
+    w_avg = precision_recall_fscore_support(labels, predicts, average="weighted")
+    avg = precision_recall_fscore_support(labels, predicts, average=None, labels=[0, 1])
+
+    output = f"Version = {requests.get(host + '/version').text}\n\n"
+
+    output += f"--- Balanced Accuracy --- = {b_accuracy}\n"
+    output += f"--- Accuracy --- = {accuracy}\n"
+    output += f"--- Weighted --- Precision = {w_avg[0]}, Recall={w_avg[1]}, F1={w_avg[2]}\n"
+    output += f"--- Class corrected --- : Precision = {avg[0][0]}, Recall={avg[1][0]}, F1={avg[2][0]}\n"
+    output += f"--- Class incorrected --- : Precision = {avg[0][1]}, Recall={avg[1][1]}, F1={avg[2][1]}\n"
+
+    output += f"--- AVG Time corrected pdfs --- = {np.mean(times_correct)}\n"
+    output += f"--- AVG Time incorrected pdfs --- = {np.mean(times_incorrect)}\n"
+    output += f"--- AVG Time all pdfs --- = {np.mean(times_correct + times_incorrect)}\n"
+
+    output += "\n\n--- Failed corrected pdfs --- : \n" + '\n'.join(failed_corrected_pdfs)  # noqa
+    output += "\n\n--- Failed incorrected pdfs --- : \n" + '\n'.join(failed_incorrected_pdfs)  # noqa
+
+    print(output)
     with open(path_result, "w") as file_out:
-        json.dump(obj=result, fp=file_out, indent=4, ensure_ascii=False)
+        file_out.write(output)
     print(f"Save result in {path_result}")
+
+
+if __name__ == "__main__":
+    get_metrics()

From e4ec06b51754cd4c2548ad03bc86a48392ba31fa Mon Sep 17 00:00:00 2001
From: "Zykina (Bogatenkova) Anastasiya" <bogatenkova.anastasiya@mail.ru>
Date: Fri, 20 Dec 2024 17:14:36 +0300
Subject: [PATCH 2/4] TLDR-791 add handling of client disconnection (#511)

---
 dedoc/api/api_utils.py                        |  24 ++--
 dedoc/api/cancellation.py                     |  34 ++++++
 dedoc/api/dedoc_api.py                        |  40 +++---
 dedoc/api/process_handler.py                  | 115 ++++++++++++++++++
 dedoc/common/exceptions/bad_file_error.py     |   6 +-
 .../common/exceptions/bad_parameters_error.py |   4 -
 dedoc/common/exceptions/conversion_error.py   |   6 +-
 dedoc/common/exceptions/dedoc_error.py        |  17 ++-
 .../common/exceptions/java_not_found_error.py |   6 +-
 dedoc/common/exceptions/minio_error.py        |  19 ---
 dedoc/common/exceptions/missing_file_error.py |   4 -
 dedoc/common/exceptions/recognize_error.py    |   6 +-
 .../exceptions/structure_extractor_error.py   |   4 -
 dedoc/common/exceptions/tabby_pdf_error.py    |   6 +-
 .../dedoc_using_patterns_tutorial.py          |   4 +-
 docs/source/tutorials/using_patterns.rst      |   2 +-
 resources/benchmarks/time_benchmark.csv       |  24 ++--
 scripts/benchmark.py                          |   5 +-
 tests/api_tests/test_api_misc_main.py         |  22 ++++
 19 files changed, 233 insertions(+), 115 deletions(-)
 create mode 100644 dedoc/api/cancellation.py
 create mode 100644 dedoc/api/process_handler.py
 delete mode 100644 dedoc/common/exceptions/minio_error.py

diff --git a/dedoc/api/api_utils.py b/dedoc/api/api_utils.py
index ad91f2d8..e4e30201 100644
--- a/dedoc/api/api_utils.py
+++ b/dedoc/api/api_utils.py
@@ -1,5 +1,6 @@
 from typing import Dict, Iterator, List, Optional, Set
 
+from dedoc.api.schema import LineMetadata, ParsedDocument, Table, TreeNode
 from dedoc.data_structures.concrete_annotations.attach_annotation import AttachAnnotation
 from dedoc.data_structures.concrete_annotations.bold_annotation import BoldAnnotation
 from dedoc.data_structures.concrete_annotations.italic_annotation import ItalicAnnotation
@@ -10,10 +11,6 @@
 from dedoc.data_structures.concrete_annotations.table_annotation import TableAnnotation
 from dedoc.data_structures.concrete_annotations.underlined_annotation import UnderlinedAnnotation
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
-from dedoc.data_structures.line_metadata import LineMetadata
-from dedoc.data_structures.parsed_document import ParsedDocument
-from dedoc.data_structures.table import Table
-from dedoc.data_structures.tree_node import TreeNode
 from dedoc.extensions import converted_mimes, recognized_mimes
 
 
@@ -39,7 +36,7 @@ def _node2tree(paragraph: TreeNode, depth: int, depths: Set[int] = None) -> str:
     space = "".join(space)
     node_result = []
 
-    node_result.append(f"  {space} {paragraph.metadata.hierarchy_level.line_type}&nbsp{paragraph.node_id} ")
+    node_result.append(f"  {space} {paragraph.metadata.paragraph_type}&nbsp{paragraph.node_id} ")
     for text in __prettify_text(paragraph.text):
         space = [space_symbol] * 4 * (depth - 1) + 4 * [space_symbol]
         space = "".join(space)
@@ -98,7 +95,7 @@ def json2tree(paragraph: TreeNode) -> str:
         depths = {d for d in depths if d <= depth}
         space = [space_symbol] * 4 * (depth - 1) + 4 * ["-"]
         space = __add_vertical_line(depths, space)
-        node_result.append(f"<p> <tt> <em>  {space} {node.metadata.hierarchy_level.line_type}&nbsp{node.node_id} </em> </tt> </p>")
+        node_result.append(f"<p> <tt> <em>  {space} {node.metadata.paragraph_type}&nbsp{node.node_id} </em> </tt> </p>")
         for text in __prettify_text(node.text):
             space = [space_symbol] * 4 * (depth - 1) + 4 * [space_symbol]
             space = __add_vertical_line(depths, space)
@@ -136,14 +133,14 @@ def json2html(text: str,
 
     ptext = __annotations2html(paragraph=paragraph, table2id=table2id, attach2id=attach2id, tabs=tabs)
 
-    if paragraph.metadata.hierarchy_level.line_type in [HierarchyLevel.header, HierarchyLevel.root]:
+    if paragraph.metadata.paragraph_type in [HierarchyLevel.header, HierarchyLevel.root]:
         ptext = f"<strong>{ptext.strip()}</strong>"
-    elif paragraph.metadata.hierarchy_level.line_type == HierarchyLevel.list_item:
+    elif paragraph.metadata.paragraph_type == HierarchyLevel.list_item:
         ptext = f"<em>{ptext.strip()}</em>"
     else:
         ptext = ptext.strip()
 
-    ptext = f'<p> {"&nbsp;" * tabs} {ptext}     <sub> id = {paragraph.node_id} ; type = {paragraph.metadata.hierarchy_level.line_type} </sub></p>'
+    ptext = f'<p> {"&nbsp;" * tabs} {ptext}     <sub> id = {paragraph.node_id} ; type = {paragraph.metadata.paragraph_type} </sub></p>'
     if hasattr(paragraph.metadata, "uid"):
         ptext = f'<div id="{paragraph.metadata.uid}">{ptext}</div>'
     text += ptext
@@ -259,11 +256,10 @@ def table2html(table: Table, table2id: Dict[str, int]) -> str:
                 text += ' style="display: none" '
             cell_node = TreeNode(
                 node_id="0",
-                text=cell.get_text(),
-                annotations=cell.get_annotations(),
-                metadata=LineMetadata(page_id=table.metadata.page_id, line_id=0),
-                subparagraphs=[],
-                parent=None
+                text="\n".join([line.text for line in cell.lines]),
+                annotations=cell.lines[0].annotations if cell.lines else [],
+                metadata=LineMetadata(page_id=0, line_id=0, paragraph_type=HierarchyLevel.raw_text),
+                subparagraphs=[]
             )
             text += f' colspan="{cell.colspan}" rowspan="{cell.rowspan}">{__annotations2html(cell_node, {}, {})}</td>\n'
 
diff --git a/dedoc/api/cancellation.py b/dedoc/api/cancellation.py
new file mode 100644
index 00000000..e9a6ddbb
--- /dev/null
+++ b/dedoc/api/cancellation.py
@@ -0,0 +1,34 @@
+import logging
+from contextlib import asynccontextmanager
+
+from anyio import create_task_group
+from fastapi import Request
+
+
+@asynccontextmanager
+async def cancel_on_disconnect(request: Request, logger: logging.Logger) -> None:
+    """
+    Async context manager for async code that needs to be cancelled if client disconnects prematurely.
+    The client disconnect is monitored through the Request object.
+
+    Source: https://github.com/dorinclisu/runner-with-api
+    See discussion: https://github.com/fastapi/fastapi/discussions/8805
+    """
+    async with create_task_group() as task_group:
+        async def watch_disconnect() -> None:
+            while True:
+                message = await request.receive()
+
+                if message["type"] == "http.disconnect":
+                    client = f"{request.client.host}:{request.client.port}" if request.client else "-:-"
+                    logger.warning(f"{client} - `{request.method} {request.url.path}` 499 DISCONNECTED")
+
+                    task_group.cancel_scope.cancel()
+                    break
+
+        task_group.start_soon(watch_disconnect)
+
+        try:
+            yield
+        finally:
+            task_group.cancel_scope.cancel()
diff --git a/dedoc/api/dedoc_api.py b/dedoc/api/dedoc_api.py
index 1458ffd3..545b8efc 100644
--- a/dedoc/api/dedoc_api.py
+++ b/dedoc/api/dedoc_api.py
@@ -1,10 +1,8 @@
-import base64
 import dataclasses
 import importlib
 import json
 import os
 import tempfile
-import traceback
 from typing import Optional
 
 from fastapi import Depends, FastAPI, File, Request, Response, UploadFile
@@ -15,24 +13,23 @@
 import dedoc.version
 from dedoc.api.api_args import QueryParameters
 from dedoc.api.api_utils import json2collapsed_tree, json2html, json2tree, json2txt
+from dedoc.api.process_handler import ProcessHandler
 from dedoc.api.schema.parsed_document import ParsedDocument
 from dedoc.common.exceptions.dedoc_error import DedocError
 from dedoc.common.exceptions.missing_file_error import MissingFileError
 from dedoc.config import get_config
-from dedoc.dedoc_manager import DedocManager
 from dedoc.utils.utils import save_upload_file
 
 config = get_config()
+logger = config["logger"]
 PORT = config["api_port"]
 static_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "web")
 static_files_dirs = config.get("static_files_dirs")
 
 app = FastAPI()
 app.mount("/web", StaticFiles(directory=config.get("static_path", static_path)), name="web")
-
 module_api_args = importlib.import_module(config["import_path_init_api_args"])
-logger = config["logger"]
-manager = DedocManager(config=config)
+process_handler = ProcessHandler(logger=logger)
 
 
 @app.get("/")
@@ -62,27 +59,20 @@ def _get_static_file_path(request: Request) -> str:
     return os.path.abspath(os.path.join(directory, file))
 
 
-def __add_base64_info_to_attachments(document_tree: ParsedDocument, attachments_dir: str) -> None:
-    for attachment in document_tree.attachments:
-        with open(os.path.join(attachments_dir, attachment.metadata.temporary_file_name), "rb") as attachment_file:
-            attachment.metadata.add_attribute("base64", base64.b64encode(attachment_file.read()).decode("utf-8"))
-
-
 @app.post("/upload", response_model=ParsedDocument)
-async def upload(file: UploadFile = File(...), query_params: QueryParameters = Depends()) -> Response:
+async def upload(request: Request, file: UploadFile = File(...), query_params: QueryParameters = Depends()) -> Response:
     parameters = dataclasses.asdict(query_params)
     if not file or file.filename == "":
         raise MissingFileError("Error: Missing content in request_post file parameter", version=dedoc.version.__version__)
 
-    return_format = str(parameters.get("return_format", "json")).lower()
-
     with tempfile.TemporaryDirectory() as tmpdir:
         file_path = save_upload_file(file, tmpdir)
-        document_tree = manager.parse(file_path, parameters={**dict(parameters), "attachments_dir": tmpdir})
+        document_tree = await process_handler.handle(request=request, parameters=parameters, file_path=file_path, tmpdir=tmpdir)
 
-        if return_format == "html":
-            __add_base64_info_to_attachments(document_tree, tmpdir)
+    if document_tree is None:
+        return JSONResponse(status_code=499, content={})
 
+    return_format = str(parameters.get("return_format", "json")).lower()
     if return_format == "html":
         html_content = json2html(
             text="",
@@ -102,24 +92,25 @@ async def upload(file: UploadFile = File(...), query_params: QueryParameters = D
         return HTMLResponse(content=html_content)
 
     if return_format == "ujson":
-        return UJSONResponse(content=document_tree.to_api_schema().model_dump())
+        return UJSONResponse(content=document_tree.model_dump())
 
     if return_format == "collapsed_tree":
         html_content = json2collapsed_tree(paragraph=document_tree.content.structure)
         return HTMLResponse(content=html_content)
 
     if return_format == "pretty_json":
-        return PlainTextResponse(content=json.dumps(document_tree.to_api_schema().model_dump(), ensure_ascii=False, indent=2))
+        return PlainTextResponse(content=json.dumps(document_tree.model_dump(), ensure_ascii=False, indent=2))
 
     logger.info(f"Send result. File {file.filename} with parameters {parameters}")
-    return ORJSONResponse(content=document_tree.to_api_schema().model_dump())
+    return ORJSONResponse(content=document_tree.model_dump())
 
 
 @app.get("/upload_example")
-async def upload_example(file_name: str, return_format: Optional[str] = None) -> Response:
+async def upload_example(request: Request, file_name: str, return_format: Optional[str] = None) -> Response:
     file_path = os.path.join(static_path, "examples", file_name)
     parameters = {} if return_format is None else {"return_format": return_format}
-    document_tree = manager.parse(file_path, parameters=parameters)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        document_tree = await process_handler.handle(request=request, parameters=parameters, file_path=file_path, tmpdir=tmpdir)
 
     if return_format == "html":
         html_page = json2html(
@@ -130,12 +121,11 @@ async def upload_example(file_name: str, return_format: Optional[str] = None) ->
             tabs=0
         )
         return HTMLResponse(content=html_page)
-    return ORJSONResponse(content=document_tree.to_api_schema().model_dump(), status_code=200)
+    return ORJSONResponse(content=document_tree.model_dump(), status_code=200)
 
 
 @app.exception_handler(DedocError)
 async def exception_handler(request: Request, exc: DedocError) -> Response:
-    logger.error(f"Exception {exc}\n{traceback.format_exc()}")
     result = {"message": exc.msg}
     if exc.filename:
         result["file_name"] = exc.filename
diff --git a/dedoc/api/process_handler.py b/dedoc/api/process_handler.py
new file mode 100644
index 00000000..2868357d
--- /dev/null
+++ b/dedoc/api/process_handler.py
@@ -0,0 +1,115 @@
+import asyncio
+import base64
+import logging
+import os
+import pickle
+import signal
+import traceback
+from multiprocessing import Process, Queue
+from typing import Optional
+from urllib.request import Request
+
+from anyio import get_cancelled_exc_class
+
+from dedoc.api.cancellation import cancel_on_disconnect
+from dedoc.api.schema import ParsedDocument
+from dedoc.common.exceptions.dedoc_error import DedocError
+from dedoc.config import get_config
+from dedoc.dedoc_manager import DedocManager
+
+
+class ProcessHandler:
+    """
+    Class for file parsing by DedocManager with support for client disconnection.
+    If client disconnects during file parsing, the process of parsing is fully terminated and API is available to receive new connections.
+
+    Handler uses the following algorithm:
+    1. Master process is used for checking current connection (client disconnect)
+    2. Child process is working on the background and waiting for the input file in the input_queue
+    3. Master process calls the child process for parsing and transfers data through the input_queue
+    4. Child process is parsing file using DedocManager
+    5. The result of parsing is transferred to the master process through the output_queue
+    6. If client disconnects, the child process is terminated. The new child process with queues will start with the new request
+    """
+    def __init__(self, logger: logging.Logger) -> None:
+        self.input_queue = Queue()
+        self.output_queue = Queue()
+        self.logger = logger
+        self.process = Process(target=self.__parse_file, args=[self.input_queue, self.output_queue])
+        self.process.start()
+
+    async def handle(self, request: Request, parameters: dict, file_path: str, tmpdir: str) -> Optional[ParsedDocument]:
+        """
+        Handle request in a separate process.
+        Checks for client disconnection and terminate the child process if client disconnected.
+        """
+        if self.process is None:
+            self.logger.info("Initialization of a new parsing process")
+            self.__init__(logger=self.logger)
+
+        self.logger.info("Putting file to the input queue")
+        self.input_queue.put(pickle.dumps((parameters, file_path, tmpdir)), block=True)
+
+        loop = asyncio.get_running_loop()
+        async with cancel_on_disconnect(request, self.logger):
+            try:
+                future = loop.run_in_executor(None, self.output_queue.get)
+                result = await future
+            except get_cancelled_exc_class():
+                self.logger.warning("Terminating the parsing process")
+                if self.process is not None:
+                    self.process.terminate()
+                self.process = None
+                future.cancel(DedocError)
+                return None
+
+        result = pickle.loads(result)
+        if isinstance(result, ParsedDocument):
+            self.logger.info("Got the result from the output queue")
+            return result
+
+        raise DedocError.from_dict(result)
+
+    def __parse_file(self, input_queue: Queue, output_queue: Queue) -> None:
+        """
+        Function for file parsing in a separate (child) process.
+        It's a background process, i.e. it is waiting for a task in the input queue.
+        The result of parsing is returned in the output queue.
+
+        Operations with `signal` are used for saving master process while killing child process.
+        See the issue for more details: https://github.com/fastapi/fastapi/issues/1487
+        """
+        signal.set_wakeup_fd(-1)
+        signal.signal(signal.SIGTERM, signal.SIG_DFL)
+        signal.signal(signal.SIGINT, signal.SIG_DFL)
+
+        manager = DedocManager(config=get_config())
+        manager.logger.info("Parsing process is waiting for the task in the input queue")
+
+        while True:
+            file_path = None
+            try:
+                parameters, file_path, tmp_dir = pickle.loads(input_queue.get(block=True))
+                manager.logger.info("Parsing process got task from the input queue")
+                return_format = str(parameters.get("return_format", "json")).lower()
+                document_tree = manager.parse(file_path, parameters={**dict(parameters), "attachments_dir": tmp_dir})
+
+                if return_format == "html":
+                    self.__add_base64_info_to_attachments(document_tree, tmp_dir)
+
+                output_queue.put(pickle.dumps(document_tree.to_api_schema()), block=True)
+                manager.logger.info("Parsing process put task to the output queue")
+            except DedocError as e:
+                tb = traceback.format_exc()
+                manager.logger.error(f"Exception {e}: {e.msg_api}\n{tb}")
+                output_queue.put(pickle.dumps(e.__dict__), block=True)
+            except Exception as e:
+                exc_message = f"Exception {e}\n{traceback.format_exc()}"
+                filename = "" if file_path is None else os.path.basename(file_path)
+                manager.logger.error(exc_message)
+                output_queue.put(pickle.dumps({"msg": exc_message, "filename": filename}), block=True)
+
+    def __add_base64_info_to_attachments(self, document_tree: ParsedDocument, attachments_dir: str) -> None:
+        for attachment in document_tree.attachments:
+            with open(os.path.join(attachments_dir, attachment.metadata.temporary_file_name), "rb") as attachment_file:
+                attachment.metadata.add_attribute("base64", base64.b64encode(attachment_file.read()).decode("utf-8"))
diff --git a/dedoc/common/exceptions/bad_file_error.py b/dedoc/common/exceptions/bad_file_error.py
index 4b800c9d..2c1176bc 100644
--- a/dedoc/common/exceptions/bad_file_error.py
+++ b/dedoc/common/exceptions/bad_file_error.py
@@ -9,11 +9,7 @@ class BadFileFormatError(DedocError):
     """
 
     def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[str] = None, version: Optional[str] = None) -> None:
-        super(BadFileFormatError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version)
+        super(BadFileFormatError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version, code=415)
 
     def __str__(self) -> str:
         return f"BadFileFormatError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 415
diff --git a/dedoc/common/exceptions/bad_parameters_error.py b/dedoc/common/exceptions/bad_parameters_error.py
index dc8c0aa9..98e81d29 100644
--- a/dedoc/common/exceptions/bad_parameters_error.py
+++ b/dedoc/common/exceptions/bad_parameters_error.py
@@ -14,7 +14,3 @@ def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[s
 
     def __str__(self) -> str:
         return f"BadParametersError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 400
diff --git a/dedoc/common/exceptions/conversion_error.py b/dedoc/common/exceptions/conversion_error.py
index f95207b3..70551230 100644
--- a/dedoc/common/exceptions/conversion_error.py
+++ b/dedoc/common/exceptions/conversion_error.py
@@ -9,11 +9,7 @@ class ConversionError(DedocError):
     """
 
     def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[str] = None, version: Optional[str] = None) -> None:
-        super(ConversionError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version)
+        super(ConversionError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version, code=415)
 
     def __str__(self) -> str:
         return f"ConversionError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 415
diff --git a/dedoc/common/exceptions/dedoc_error.py b/dedoc/common/exceptions/dedoc_error.py
index f91c8bd0..9c793256 100644
--- a/dedoc/common/exceptions/dedoc_error.py
+++ b/dedoc/common/exceptions/dedoc_error.py
@@ -9,17 +9,26 @@ def __init__(self,
                  msg_api: Optional[str] = None,
                  filename: Optional[str] = None,
                  version: Optional[str] = None,
-                 metadata: Optional[dict] = None) -> None:
+                 metadata: Optional[dict] = None,
+                 code: Optional[int] = None) -> None:
         super(DedocError, self).__init__()
         self.msg = msg
         self.msg_api = msg if msg_api is None else msg_api
         self.filename = filename
         self.version = version if version is not None else dedoc.version.__version__
         self.metadata = metadata
+        self.code = 400 if code is None else code
 
     def __str__(self) -> str:
         return f"DedocError({self.msg})"
 
-    @property
-    def code(self) -> int:
-        return 400
+    @staticmethod
+    def from_dict(error_dict: dict) -> "DedocError":
+        return DedocError(
+            msg=error_dict.get("msg", ""),
+            msg_api=error_dict.get("msg_api", ""),
+            filename=error_dict.get("filename", ""),
+            version=error_dict.get("version", dedoc.version.__version__),
+            metadata=error_dict.get("metadata", {}),
+            code=error_dict.get("code", 500)
+        )
diff --git a/dedoc/common/exceptions/java_not_found_error.py b/dedoc/common/exceptions/java_not_found_error.py
index c6d96384..105556ba 100644
--- a/dedoc/common/exceptions/java_not_found_error.py
+++ b/dedoc/common/exceptions/java_not_found_error.py
@@ -9,11 +9,7 @@ class JavaNotFoundError(DedocError):
     """
 
     def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[str] = None, version: Optional[str] = None) -> None:
-        super(JavaNotFoundError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version)
+        super(JavaNotFoundError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version, code=500)
 
     def __str__(self) -> str:
         return f"JavaNotFoundError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 500
diff --git a/dedoc/common/exceptions/minio_error.py b/dedoc/common/exceptions/minio_error.py
deleted file mode 100644
index 6d43c64f..00000000
--- a/dedoc/common/exceptions/minio_error.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from typing import Optional
-
-from dedoc.common.exceptions.dedoc_error import DedocError
-
-
-class MinioError(DedocError):
-    """
-    Raise if there is no file in minio
-    """
-
-    def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[str] = None, version: Optional[str] = None) -> None:
-        super(MinioError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version)
-
-    def __str__(self) -> str:
-        return f"MinioError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 404
diff --git a/dedoc/common/exceptions/missing_file_error.py b/dedoc/common/exceptions/missing_file_error.py
index 7bc861e9..1272376f 100644
--- a/dedoc/common/exceptions/missing_file_error.py
+++ b/dedoc/common/exceptions/missing_file_error.py
@@ -13,7 +13,3 @@ def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[s
 
     def __str__(self) -> str:
         return f"MissingFileError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 400
diff --git a/dedoc/common/exceptions/recognize_error.py b/dedoc/common/exceptions/recognize_error.py
index 05c388ce..767cba6a 100644
--- a/dedoc/common/exceptions/recognize_error.py
+++ b/dedoc/common/exceptions/recognize_error.py
@@ -6,11 +6,7 @@
 class RecognizeError(DedocError):
 
     def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[str] = None, version: Optional[str] = None) -> None:
-        super(RecognizeError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version)
+        super(RecognizeError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version, code=500)
 
     def __str__(self) -> str:
         return f"RecognizeError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 500
diff --git a/dedoc/common/exceptions/structure_extractor_error.py b/dedoc/common/exceptions/structure_extractor_error.py
index 1bb9bd00..803d4f1c 100644
--- a/dedoc/common/exceptions/structure_extractor_error.py
+++ b/dedoc/common/exceptions/structure_extractor_error.py
@@ -13,7 +13,3 @@ def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[s
 
     def __str__(self) -> str:
         return f"StructureExtractorError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 400
diff --git a/dedoc/common/exceptions/tabby_pdf_error.py b/dedoc/common/exceptions/tabby_pdf_error.py
index eff2ec8d..c3380be1 100644
--- a/dedoc/common/exceptions/tabby_pdf_error.py
+++ b/dedoc/common/exceptions/tabby_pdf_error.py
@@ -9,11 +9,7 @@ class TabbyPdfError(DedocError):
     """
 
     def __init__(self, msg: str, msg_api: Optional[str] = None, filename: Optional[str] = None, version: Optional[str] = None) -> None:
-        super(TabbyPdfError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version)
+        super(TabbyPdfError, self).__init__(msg_api=msg_api, msg=msg, filename=filename, version=version, code=500)
 
     def __str__(self) -> str:
         return f"TabbyPdfError({self.msg})"
-
-    @property
-    def code(self) -> int:
-        return 500
diff --git a/docs/source/_static/code_examples/dedoc_using_patterns_tutorial.py b/docs/source/_static/code_examples/dedoc_using_patterns_tutorial.py
index 5c8e81b9..eb44051b 100644
--- a/docs/source/_static/code_examples/dedoc_using_patterns_tutorial.py
+++ b/docs/source/_static/code_examples/dedoc_using_patterns_tutorial.py
@@ -34,7 +34,7 @@
 docx_document = structure_extractor.extract(document=docx_document, parameters={"patterns": patterns})
 
 docx_document.metadata = docx_metadata_extractor.extract(file_path=docx_file_path)
-docx_parsed_document = structure_constructor.construct(document=docx_document)
+docx_parsed_document = structure_constructor.construct(document=docx_document).to_api_schema()
 html = json2html(
     paragraph=docx_parsed_document.content.structure,
     attachments=docx_parsed_document.attachments,
@@ -46,7 +46,7 @@
 
 def print_document_tree(document: UnstructuredDocument, patterns: List[AbstractPattern]) -> None:
     document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
-    parsed_document = structure_constructor.construct(document=document)
+    parsed_document = structure_constructor.construct(document=document).to_api_schema()
     html = json2html(paragraph=parsed_document.content.structure, attachments=parsed_document.attachments, tables=parsed_document.content.tables, text="")
     print(f"\n\nDocument tree\n{html2text.html2text(html)}")
 
diff --git a/docs/source/tutorials/using_patterns.rst b/docs/source/tutorials/using_patterns.rst
index e2ea2d71..dc229423 100644
--- a/docs/source/tutorials/using_patterns.rst
+++ b/docs/source/tutorials/using_patterns.rst
@@ -91,7 +91,7 @@ which applies patterns if lines match them, else line becomes simple raw text li
     :language: python
     :lines: 30-37
 
-Let's see the resulting tree. In the code below we use an auxiliary function to convert :class:`~dedoc.data_structures.ParsedDocument`
+Let's see the resulting tree. In the code below we use an auxiliary function to convert :class:`~dedoc.api.schema.ParsedDocument`
 to the HTML representation and print it:
 
 .. literalinclude:: ../_static/code_examples/dedoc_using_patterns_tutorial.py
diff --git a/resources/benchmarks/time_benchmark.csv b/resources/benchmarks/time_benchmark.csv
index 9be87ffa..1e9467a9 100644
--- a/resources/benchmarks/time_benchmark.csv
+++ b/resources/benchmarks/time_benchmark.csv
@@ -1,13 +1,13 @@
 ,Dataset,total_file_size,total_files,total_pages,total_time_raw,throughput_raw,mean_time_on_file_raw,mean_time_cpu_on_page_raw,total_time_indp_cpu,throughput_indp_cpu,mean_time_on_file_indp_cpu,mean_time_cpu_on_page_indp_cpu,cpu_performance,version
-0,images,105240044,259,259,819.3893718719482,128437.16017401138,3.1636655284631208,3.1636655284631208,845.0002507880153,124544.39380563155,3.2625492308417576,3.262549230841758,1.0312560545636043,2.1
-0,law_html,215921201,1526,1526,227.0532796382904,950971.5135759128,0.14878982938289018,0.14878982938289018,234.1500693355101,922148.7809623912,0.15344041240859116,0.15344041240859116,1.0312560545636043,2.1
-0,other_html,215921201,1526,1526,156.9773073196411,1375493.0867831479,0.1028684844820715,0.1028684844820715,161.8837986024715,1333803.6472088536,0.10608374744591842,0.1060837474459184,1.0312560545636043,2.1
-0,txt,2483851,999,999,13.047960042953491,190363.16725551253,0.013061021064017509,0.013061021064017509,13.455787793999773,184593.5026641549,0.013469257051050823,0.013469257051050825,1.0312560545636043,2.1
-0,pdf_text_layer_true,109643533,33,445,417.5641739368439,262578.8797115134,12.653459816267997,1.0459708427522103,430.6155825412202,254620.4490626033,13.048957046703642,1.0786637644852126,1.0312560545636043,2.1
-0,pdf_text_layer_auto,109643533,33,445,744.6476347446442,147242.16916045017,22.565079840746794,1.9358688088909384,767.9223818468816,142779.44697523108,23.270375207481262,1.9963764300096132,1.0312560545636043,2.1
-0,pdf_text_layer_auto_tabby,109643533,33,445,861.5465660095215,127263.61792357055,26.107471697258227,2.408536994270351,888.475112485801,123406.42012271588,26.923488257145486,2.4838183579817246,1.0312560545636043,2.1
-0,pdf_text_layer_false,109643533,33,445,1923.4744081497192,57002.85511231277,58.28710327726422,4.837624405643553,1983.5946292025433,55275.171340869965,60.10892815765283,4.988829458024572,1.0312560545636043,2.1
-0,pdf_text_layer_tabby,109643533,33,445,459.48560762405396,238622.3445973723,13.923806291637998,1.2937336014756313,473.84731484714223,231390.00594604985,14.359009540822491,1.334170609514122,1.0312560545636043,2.1
-0,docx,417727,22,22,16.942837953567505,24655.078514284138,0.770128997889432,0.770128997889432,17.472404221106515,23907.814557963888,0.794200191868478,0.794200191868478,1.0312560545636043,2.1
-0,pdf,6086144,18,117,375.61194705963135,16203.275874592393,20.86733039220174,3.0367271868588284,387.35209457166883,15712.175267130062,21.519560809537158,3.131643297506068,1.0312560545636043,2.1
-0,pdf_tables,16536264,2,267,1197.7023212909698,13806.656049706928,598.8511606454849,4.039958413717207,1235.137770396196,13388.193929731136,617.568885198098,4.166231574331044,1.0312560545636043,2.1
+0,images,105240044,259,259,780.3763222694397,134858.06910946214,3.0130359933183,3.0130359933183,1066.0429167915163,98720.2694585152,4.115995817727862,4.115995817727862,1.366062611550437,2.3.1
+0,law_html,215921201,1526,1526,204.2208013534546,1057292.8887214332,0.13382752382270943,0.13382752382270943,278.9784012298232,773971.0316216326,0.18281677669057877,0.18281677669057877,1.366062611550437,2.3.1
+0,other_html,215921201,1526,1526,152.16186046600342,1419023.139824463,0.0997128836605527,0.0997128836605527,207.86262848656185,1038768.7415102572,0.13621404225855954,0.13621404225855951,1.366062611550437,2.3.1
+0,txt,2483851,999,999,12.656875133514404,196245.20063589464,0.012669544678192597,0.012669544678192597,17.290083898956475,143657.5446663917,0.01730739129024672,0.01730739129024672,1.366062611550437,2.3.1
+0,pdf_text_layer_true,109643533,33,445,294.70041608810425,372050.825225916,8.930315639033463,0.7666412830448923,402.57922002631614,272352.6887275323,12.199370303827761,1.0472799932386834,1.366062611550437,2.3.1
+0,pdf_text_layer_auto,109643533,33,445,715.7886617183685,153178.6389809286,21.69056550661723,1.9423069545744724,977.8121285451869,112131.49213349436,29.63067056197536,2.65331291079858,1.366062611550437,2.3.1
+0,pdf_text_layer_auto_tabby,109643533,33,445,844.7789170742035,129789.61806923167,25.59936112346071,2.380779043078811,1154.0208936411366,95010.00684143213,34.97033011033747,3.2522932371127915,1.366062611550437,2.3.1
+0,pdf_text_layer_false,109643533,33,445,1591.9220836162567,68874.93686307219,48.240063139886566,3.9471288925826884,2174.665238929637,50418.57985184248,65.89894663423142,5.392025203127692,1.366062611550437,2.3.1
+0,pdf_text_layer_tabby,109643533,33,445,421.8361530303955,259919.71577670728,12.782913728193803,1.1935813540785523,576.2545968550919,190269.25528816486,17.46226051076036,1.630506861650454,1.366062611550437,2.3.1
+0,docx,417727,22,22,17.311132431030273,24130.541526631885,0.7868696559559215,0.7868696559559215,23.64809077762868,17664.30127184617,1.07491321716494,1.07491321716494,1.366062611550437,2.3.1
+0,pdf,6086144,18,117,310.7921574115753,19582.68204284271,17.26623096730974,2.519494602322346,424.5615462030511,14335.127743974337,23.58675256683617,3.441787376235694,1.366062611550437,2.3.1
+0,pdf_tables,16536264,2,267,1083.6798040866852,15259.363455551895,541.8399020433426,3.6198095974726074,1480.3744632551231,11170.325083586768,740.1872316275616,4.944886552038766,1.366062611550437,2.3.1
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
index a82b2131..39cfbce4 100644
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -100,7 +100,10 @@ def get_times(spend_page_times: List, total_size: int, total_time: int, total_fi
             file_size = os.path.getsize(file_path)
             total_size += file_size
             time_start = time.time()
-            send_file(host=host, file_name=file, file_path=file_path, parameters=parameters)
+            try:
+                send_file(host=host, file_name=file, file_path=file_path, parameters=parameters)
+            except AssertionError as e:
+                print(f"Error on file {file_path}: {e}")
             time_finish = time.time()
             spend_file_time = time_finish - time_start
             pages = page_func(file_path)
diff --git a/tests/api_tests/test_api_misc_main.py b/tests/api_tests/test_api_misc_main.py
index cbc47976..550ebc59 100644
--- a/tests/api_tests/test_api_misc_main.py
+++ b/tests/api_tests/test_api_misc_main.py
@@ -1,7 +1,9 @@
 import json
 import os
+import time
 
 import requests
+from requests import ReadTimeout
 
 from tests.api_tests.abstract_api_test import AbstractTestApiDocReader
 
@@ -14,6 +16,26 @@ def __get_version(self) -> str:
             version = file.read().strip()
             return version
 
+    def test_cancellation(self) -> None:
+        file_name = "article.pdf"
+        start_time = time.time()
+        with open(self._get_abs_path(os.path.join("pdf_with_text_layer", file_name)), "rb") as file:
+            files = {"file": (file_name, file)}
+            parameters = dict(pdf_with_text_layer=False)
+            try:
+                requests.post(f"http://{self._get_host()}:{self._get_port()}/upload", files=files, data=parameters, timeout=1)
+            except ReadTimeout:
+                pass
+
+        file_name = "example.txt"
+        with open(self._get_abs_path(os.path.join("txt", file_name)), "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post(f"http://{self._get_host()}:{self._get_port()}/upload", files=files, data={}, timeout=60)
+
+        end_time = time.time()
+        self.assertLess(end_time - start_time, 60)
+        self.assertEqual(200, r.status_code)
+
     def test_bin_file(self) -> None:
         file_name = "file.bin"
         result = self._send_request(file_name, expected_code=415)

From 76a7f4a7ca58072a8fa1785271c117525d2a210c Mon Sep 17 00:00:00 2001
From: Andrey Mikhailov <mikhailov@icc.ru>
Date: Fri, 20 Dec 2024 22:51:35 +0800
Subject: [PATCH 3/4] TLDR-851 TLDR-861 Refactor table recognition (#508)

Co-authored-by: Nasty <bogatenkova.anastasiya@mail.ru>
Co-authored-by: Belyaeva Oksana <belyaeva@ispras.ru>
---
 dedoc/api/api_args.py                         |   3 -
 dedoc/api/web/index.html                      |  33 ++--
 dedoc/data_structures/cell_with_meta.py       |   5 +-
 .../pdf_reader/data_classes/tables/cell.py    |  76 ++------
 .../data_classes/tables/scantable.py          |  89 ++--------
 dedoc/readers/pdf_reader/pdf_base_reader.py   |  22 +--
 .../pdf_image_reader/pdf_image_reader.py      |   2 -
 .../table_recognizer/cell_splitter.py         |  47 ++---
 .../split_last_hor_union_cells.py             |  19 +-
 .../multipage_table_extractor.py              |  36 ++--
 .../onepage_table_extractor.py                | 133 ++++----------
 .../table_attribute_extractor.py              | 143 +++++++--------
 .../table_recognizer/table_recognizer.py      |  38 +---
 .../table_utils/accuracy_table_rec.py         | 140 ---------------
 .../table_utils/img_processing.py             |   8 +-
 .../table_recognizer/table_utils/utils.py     |  25 ++-
 .../pdf_txtlayer_reader/pdf_tabby_reader.py   |  42 +++--
 .../pdf_txtlayer_reader.py                    |   6 +-
 .../tabbypdf/jars/ispras_tbl_extr.jar         | Bin 164506 -> 164553 bytes
 dedoc/utils/parameter_utils.py                |  17 --
 docs/source/dedoc_api_usage/api.rst           |  22 +--
 docs/source/parameters/pdf_handling.rst       |  24 ---
 .../test_api_format_pdf_tabby_reader.py       |   4 +-
 .../test_api_misc_multipage_table.py          |  15 +-
 .../test_api_module_table_recognizer.py       |  16 --
 tests/unit_tests/test_module_cell_splitter.py | 164 +++++++++---------
 .../test_module_gost_frame_recognizer.py      |   2 -
 .../unit_tests/test_module_table_detection.py |  76 ++++----
 28 files changed, 367 insertions(+), 840 deletions(-)
 delete mode 100644 dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/accuracy_table_rec.py

diff --git a/dedoc/api/api_args.py b/dedoc/api/api_args.py
index d1f7d5cf..f2b9e7c4 100644
--- a/dedoc/api/api_args.py
+++ b/dedoc/api/api_args.py
@@ -22,9 +22,6 @@ class QueryParameters:
     # tables handling
     need_pdf_table_analysis: str = Form("true", enum=["true", "false"], description="Enable table recognition for pdf")
     table_type: str = Form("", description="Pipeline mode for table recognition")
-    orient_analysis_cells: str = Form("false", enum=["true", "false"], description="Enable analysis of rotated cells in table headers")
-    orient_cell_angle: str = Form("90", enum=["90", "270"],
-                                  description='Set cells orientation in table headers, "90" means 90 degrees counterclockwise cells rotation')
 
     # pdf handling
     pdf_with_text_layer: str = Form("auto_tabby", enum=["true", "false", "auto", "auto_tabby", "tabby"],
diff --git a/dedoc/api/web/index.html b/dedoc/api/web/index.html
index ede62117..5538878a 100644
--- a/dedoc/api/web/index.html
+++ b/dedoc/api/web/index.html
@@ -98,31 +98,9 @@ <h4>Attachments handling</h4>
             </details>
         </div>
 
-
-        <div class="parameters">
-            <h4>Tables handling </h4>
-            <details><summary>need_pdf_table_analysis, orient_analysis_cells, orient_cell_angle</summary>
-                <br>
-                <p>
-                    <label>
-                        <input type="hidden" name="need_pdf_table_analysis" value="false">
-                        <input type="checkbox" name="need_pdf_table_analysis" value="true" checked> need_pdf_table_analysis</label>
-                </p>
-
-                <p>
-                    <label><input name="orient_analysis_cells" type="checkbox" value="true"> orient_analysis_cells</label>
-                </p>
-
-                <p>
-                    <label>orient_cell_angle <input name="orient_cell_angle" type="number" size="5" value="90"></label>
-                </p>
-            </details>
-        </div>
-
-
         <div class="parameters">
             <h4>PDF handling</h4>
-            <details><summary>pdf_with_text_layer, fast_textual_layer_detection, language, pages, is_one_column_document, document_orientation, need_header_footer_analysis, need_binarization, need_gost_frame_analysis</summary>
+            <details><summary>pdf_with_text_layer, need_pdf_table_analysis, fast_textual_layer_detection, language, pages, is_one_column_document, document_orientation, need_header_footer_analysis, need_binarization, need_gost_frame_analysis</summary>
                 <br>
                 <p>
                     <label>
@@ -153,6 +131,15 @@ <h4>PDF handling</h4>
                     </label>
                 </p>
 
+                <details><summary>need_pdf_table_analysis</summary>
+                <br>
+                    <p>
+                    <label>
+                        <input type="hidden" name="need_pdf_table_analysis" value="false">
+                        <input type="checkbox" name="need_pdf_table_analysis" value="true" checked> need_pdf_table_analysis</label>
+                    </p>
+                </details>
+
                 <p>
                     <label>pages <input name="pages" type="text" size="8" value=":"></label>
                 </p>
diff --git a/dedoc/data_structures/cell_with_meta.py b/dedoc/data_structures/cell_with_meta.py
index d23cad1c..03ee0c67 100644
--- a/dedoc/data_structures/cell_with_meta.py
+++ b/dedoc/data_structures/cell_with_meta.py
@@ -47,9 +47,8 @@ def get_annotations(self) -> List[Annotation]:
         """
         return LineWithMeta.join(lines=self.lines, delimiter="\n").annotations
 
-    @staticmethod
-    def create_from_cell(cell: "CellWithMeta") -> "CellWithMeta":
-        return CellWithMeta(lines=cell.lines, colspan=cell.colspan, rowspan=cell.rowspan, invisible=cell.invisible)
+    def __str__(self) -> str:
+        return f"CellWithMeta(cs={self.colspan}, rs={self.rowspan}, {self.get_text()})"
 
     def to_api_schema(self) -> ApiCellWithMeta:
         import numpy as np
diff --git a/dedoc/readers/pdf_reader/data_classes/tables/cell.py b/dedoc/readers/pdf_reader/data_classes/tables/cell.py
index 0d42dc37..d83e2b6c 100644
--- a/dedoc/readers/pdf_reader/data_classes/tables/cell.py
+++ b/dedoc/readers/pdf_reader/data_classes/tables/cell.py
@@ -1,8 +1,8 @@
+import copy
 from typing import List, Optional
 
 from dedocutils.data_structures import BBox
 
-from dedoc.data_structures.annotation import Annotation
 from dedoc.data_structures.cell_with_meta import CellWithMeta
 from dedoc.data_structures.line_with_meta import LineWithMeta
 
@@ -10,69 +10,37 @@
 class Cell(CellWithMeta):
 
     @staticmethod
-    def copy_from(cell: "Cell",
-                  x_top_left: Optional[int] = None,
-                  x_bottom_right: Optional[int] = None,
-                  y_top_left: Optional[int] = None,
-                  y_bottom_right: Optional[int] = None) -> "Cell":
-        x_top_left = cell.x_top_left if x_top_left is None else x_top_left
-        x_bottom_right = cell.x_bottom_right if x_bottom_right is None else x_bottom_right
-        y_top_left = cell.y_top_left if y_top_left is None else y_top_left
-        y_bottom_right = cell.y_bottom_right if y_bottom_right is None else y_bottom_right
-        return Cell(x_top_left=x_top_left,
-                    x_bottom_right=x_bottom_right,
-                    y_top_left=y_top_left,
-                    y_bottom_right=y_bottom_right,
-                    id_con=cell.id_con,
-                    lines=cell.lines,
-                    is_attribute=cell.is_attribute,
-                    is_attribute_required=cell.is_attribute_required,
-                    rotated_angle=cell.rotated_angle,
-                    uid=cell.cell_uid,
-                    contour_coord=cell.con_coord)
+    def copy_from(cell: "Cell", bbox: Optional[BBox] = None) -> "Cell":
+        copy_cell = copy.deepcopy(cell)
+        if bbox:
+            copy_cell.bbox = bbox
+
+        return copy_cell
 
     def shift(self, shift_x: int, shift_y: int, image_width: int, image_height: int) -> None:
         if self.lines:
             for line in self.lines:
                 line.shift(shift_x=shift_x, shift_y=shift_y, image_width=image_width, image_height=image_height)
-        self.x_top_left += shift_x
-        self.x_bottom_right += shift_x
-        self.y_top_left += shift_y
-        self.y_bottom_right += shift_y
-        if self.con_coord:
-            self.con_coord.shift(shift_x=shift_x, shift_y=shift_y)
 
-    def __init__(self, x_top_left: int, x_bottom_right: int, y_top_left: int, y_bottom_right: int, id_con: int = -1, lines: Optional[List[LineWithMeta]] = None,
-                 is_attribute: bool = False, is_attribute_required: bool = False, rotated_angle: int = 0, uid: str = None,
-                 contour_coord: Optional[BBox] = None) -> None:
+        self.bbox.shift(shift_x=shift_x, shift_y=shift_y)
+        if self.contour_coord:
+            self.contour_coord.shift(shift_x=shift_x, shift_y=shift_y)
 
-        import uuid
+    def __init__(self, bbox: BBox, id_con: int = -1, lines: Optional[List[LineWithMeta]] = None,
+                 is_attribute: bool = False, is_attribute_required: bool = False, rotated_angle: int = 0, uid: Optional[str] = None,
+                 contour_coord: Optional[BBox] = None, colspan: int = 1, rowspan: int = 1, invisible: bool = False) -> None:
 
-        assert x_top_left <= x_bottom_right
-        assert y_top_left <= y_bottom_right
+        import uuid
 
-        self.lines = [] if lines is None else lines
-        super().__init__(lines)
+        super().__init__(lines=lines, colspan=colspan, rowspan=rowspan, invisible=invisible)
 
-        self.x_top_left = x_top_left
-        self.x_bottom_right = x_bottom_right
-        self.y_top_left = y_top_left
-        self.y_bottom_right = y_bottom_right
+        self.bbox = bbox
         self.id_con = id_con
         self.is_attribute = is_attribute
         self.is_attribute_required = is_attribute_required
         self.rotated_angle = rotated_angle
-        self.cell_uid = f"cell_{uuid.uuid1()}" if uid is None else uid
-        self.con_coord = contour_coord or BBox(0, 0, 0, 0)
-
-    def __str__(self) -> str:
-        return f"Cell((cs={self.colspan}, rs={self.rowspan}, {self.get_text()})"
-
-    def get_text(self) -> str:
-        return "\n".join([line.line for line in self.lines])
-
-    def get_annotations(self) -> List[Annotation]:
-        return LineWithMeta.join(self.lines, delimiter="\n").annotations
+        self.uuid = uuid.uuid4() if uuid is None else uid
+        self.contour_coord = contour_coord or BBox(0, 0, 0, 0)
 
     def change_lines_boxes_page_width_height(self, new_page_width: int, new_page_height: int) -> None:
         from dedoc.data_structures.concrete_annotations.bbox_annotation import BBoxAnnotation
@@ -96,11 +64,3 @@ def change_lines_boxes_page_width_height(self, new_page_width: int, new_page_hei
 
     def __repr__(self) -> str:
         return self.__str__()
-
-    @property
-    def width(self) -> int:
-        return self.x_bottom_right - self.x_top_left
-
-    @property
-    def height(self) -> int:
-        return self.y_bottom_right - self.y_top_left
diff --git a/dedoc/readers/pdf_reader/data_classes/tables/scantable.py b/dedoc/readers/pdf_reader/data_classes/tables/scantable.py
index be812630..9ae91c18 100644
--- a/dedoc/readers/pdf_reader/data_classes/tables/scantable.py
+++ b/dedoc/readers/pdf_reader/data_classes/tables/scantable.py
@@ -1,4 +1,4 @@
-from typing import Any, List, Optional
+from typing import List
 
 from dedocutils.data_structures import BBox
 
@@ -9,93 +9,32 @@
 from dedoc.readers.pdf_reader.data_classes.tables.location import Location
 
 
-class ScanTable:
-    def __init__(self, page_number: int, matrix_cells: Optional[List[List[CellWithMeta]]] = None, bbox: Optional[BBox] = None,
-                 name: str = "", order: int = -1) -> None:
-        self.matrix_cells = matrix_cells
-        self.page_number = page_number
-        self.locations = []
-        self.name = name
+class ScanTable(Table):
+    def __init__(self, page_number: int, cells: List[List[CellWithMeta]], bbox: BBox, order: int = -1) -> None:
+
+        super().__init__(cells, TableMetadata(page_id=page_number))
         self.order = order
-        if bbox is not None:
-            self.locations.append(Location(page_number, bbox))
+        self.locations = [Location(page_number, bbox)]
 
     def extended(self, table: "ScanTable") -> None:
         # extend locations
         self.locations.extend(table.locations)
         # extend values
-        self.matrix_cells.extend(table.matrix_cells)
+        self.cells.extend(table.cells)
         # extend order
         self.order = max(self.order, table.order)
 
     def check_on_cell_instance(self) -> bool:
-        if len(self.matrix_cells) == 0:
+        if len(self.cells) == 0:
             return False
-        if len(self.matrix_cells[0]) == 0:
+        if len(self.cells[0]) == 0:
             return False
-        if not isinstance(self.matrix_cells[0][0], Cell):
+        if not isinstance(self.cells[0][0], Cell):
             return False
         return True
 
-    def to_table(self) -> Table:
-        metadata = TableMetadata(page_id=self.page_number, uid=self.name, rotated_angle=self.location.rotated_angle)
-        cells_with_meta = [[CellWithMeta.create_from_cell(cell) for cell in row] for row in self.matrix_cells]
-        return Table(metadata=metadata, cells=cells_with_meta)
-
-    @staticmethod
-    def get_cells_text(attr_cells: List[List[Cell]]) -> List[List[str]]:
-        attrs = []
-        for i in range(0, len(attr_cells)):
-            attrs.append([a.get_text() for a in attr_cells[i]])
-
-        return attrs
-
-    @staticmethod
-    def get_key_value_attrs(attrs: List, val: Any) -> dict:  # noqa
-        res_attrs = []
-        for i in range(0, len(attrs)):
-            res_attrs.append({"attr": attrs[i]})
-        res = {
-            "attrs": res_attrs,
-            "val": val
-        }
-        return res
-
-    @staticmethod
-    def get_index_of_end_string_attr(matrix_cells: List[List[Cell]]) -> int:
-        end_attr_string = 0
-        for i in range(0, len(matrix_cells)):
-            if matrix_cells[i][0].is_attribute:
-                end_attr_string = i
-
-        return end_attr_string
-
-    @staticmethod
-    def get_attributes_cell(matrix_cells: List[List[Cell]]) -> (List[int], List[List[Cell]], int):
-        import copy
-        import numpy as np
-
-        required_columns = []
-        for j in range(0, len(matrix_cells[0])):
-            if matrix_cells[0][j].is_attribute_required:
-                required_columns.append(j)
-
-        end_attr_string = ScanTable.get_index_of_end_string_attr(matrix_cells)
-
-        attrs = copy.deepcopy(np.array(matrix_cells[0:end_attr_string + 1]))
-        attrs = attrs.transpose().tolist()
-
-        return [required_columns, attrs, end_attr_string]
-
-    @staticmethod
-    def get_matrix_attrs_and_data(matrix_cells: List[List[Cell]]) -> (List[List[Cell]], List[List[str]], List[List[str]]):
-        required_columns, attrs, end_attr_string = ScanTable.get_attributes_cell(matrix_cells)
-        attrs_text = ScanTable.get_cells_text(attrs)
-
-        data = matrix_cells[(end_attr_string + 1):]
-        data_text = ScanTable.get_cells_text(data)
-
-        return [attrs, attrs_text, data_text]
+    def __get_cells_text(self, cells: List[List[CellWithMeta]]) -> List[List[str]]:
+        return [[cell.get_text() for cell in row] for row in cells]
 
     @property
     def location(self) -> Location:
@@ -103,12 +42,12 @@ def location(self) -> Location:
 
     @property
     def uid(self) -> str:
-        return self.name
+        return self.metadata.uid
 
     def to_dict(self) -> dict:
         from collections import OrderedDict
 
-        data_text = ScanTable.get_cells_text(self.matrix_cells)
+        data_text = self.__get_cells_text(self.cells)
 
         res = OrderedDict()
         res["locations"] = [location.to_dict() for location in self.locations]
diff --git a/dedoc/readers/pdf_reader/pdf_base_reader.py b/dedoc/readers/pdf_reader/pdf_base_reader.py
index 4fd9fdec..3a6e29ef 100644
--- a/dedoc/readers/pdf_reader/pdf_base_reader.py
+++ b/dedoc/readers/pdf_reader/pdf_base_reader.py
@@ -15,8 +15,6 @@
 
 
 ParametersForParseDoc = namedtuple("ParametersForParseDoc", [
-    "orient_analysis_cells",
-    "orient_cell_angle",
     "is_one_column_document",
     "document_orientation",
     "language",
@@ -73,8 +71,6 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure
 
         params_for_parse = ParametersForParseDoc(
             language=param_utils.get_param_language(parameters),
-            orient_analysis_cells=param_utils.get_param_orient_analysis_cells(parameters),
-            orient_cell_angle=param_utils.get_param_orient_cell_angle(parameters),
             is_one_column_document=param_utils.get_param_is_one_column_document(parameters),
             document_orientation=param_utils.get_param_document_orientation(parameters),
             need_header_footers_analysis=param_utils.get_param_need_header_footers_analysis(parameters),
@@ -91,12 +87,11 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure
         )
 
         lines, scan_tables, attachments, warnings, metadata = self._parse_document(file_path, params_for_parse)
-        tables = [scan_table.to_table() for scan_table in scan_tables]
 
         if params_for_parse.with_attachments and self.attachment_extractor.can_extract(file_path):
             attachments += self.attachment_extractor.extract(file_path=file_path, parameters=parameters)
 
-        result = UnstructuredDocument(lines=lines, tables=tables, attachments=attachments, warnings=warnings, metadata=metadata)
+        result = UnstructuredDocument(lines=lines, tables=scan_tables, attachments=attachments, warnings=warnings, metadata=metadata)
         return self._postprocess(result)
 
     def _parse_document(self, path: str, parameters: ParametersForParseDoc) -> (
@@ -177,7 +172,7 @@ def _shift_all_contents(self, lines: List[LineWithMeta], unref_tables: List[Scan
                 table_page_number = location.page_number
                 location.shift(shift_x=gost_analyzed_images[table_page_number][1].x_top_left, shift_y=gost_analyzed_images[table_page_number][1].y_top_left)
             page_number = scan_table.locations[0].page_number
-            for row in scan_table.matrix_cells:
+            for row in scan_table.cells:
                 for cell in row:
                     image_width, image_height = gost_analyzed_images[page_number][2][1], gost_analyzed_images[page_number][2][0]
                     shift_x, shift_y = (gost_analyzed_images[page_number][1].x_top_left, gost_analyzed_images[page_number][1].y_top_left)
@@ -275,16 +270,3 @@ def _binarization(self, gray_image: ndarray) -> ndarray:
             binary_mask = gray_image >= np.quantile(gray_image, 0.05)
             gray_image[binary_mask] = 255
         return gray_image
-
-    def eval_tables_by_batch(self,
-                             batch: Iterator[ndarray],
-                             page_number_begin: int,
-                             language: str,
-                             orient_analysis_cells: bool = False,
-                             orient_cell_angle: int = 270,
-                             table_type: str = "") -> Tuple[List[ndarray], List[ScanTable]]:
-        from joblib import Parallel, delayed
-
-        result_batch = Parallel(n_jobs=self.config["n_jobs"])(delayed(self.table_recognizer.recognize_tables_from_image)(
-            image, page_number_begin + i, language, orient_analysis_cells, orient_cell_angle, table_type) for i, image in enumerate(batch))
-        return result_batch
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py
index 64d96fe6..e53ba9e3 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py
@@ -85,8 +85,6 @@ def _process_one_page(self,
                 image=rotated_image,
                 page_number=page_number,
                 language=parameters.language,
-                orient_analysis_cells=parameters.orient_analysis_cells,
-                orient_cell_angle=parameters.orient_cell_angle,
                 table_type=parameters.table_type
             )
         else:
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/cell_splitter.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/cell_splitter.py
index 0e72128c..ab1c355d 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/cell_splitter.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/cell_splitter.py
@@ -1,6 +1,7 @@
 from typing import Dict, List, Optional, Tuple
 
 import numpy as np
+from dedocutils.data_structures import BBox
 
 from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
 from dedoc.utils.utils import flatten
@@ -55,25 +56,26 @@ def split(self, cells: List[List[Cell]]) -> List[List[Cell]]:
         for row_id, row in enumerate(result_matrix):
             for col_id, cell in enumerate(row):
                 if cell is None:
-                    result_matrix[row_id][col_id] = Cell(x_top_left=horizontal_borders[row_id],
-                                                         x_bottom_right=horizontal_borders[row_id + 1],
-                                                         y_top_left=vertical_borders[col_id],
-                                                         y_bottom_right=vertical_borders[col_id + 1])
+                    bbox = BBox(x_top_left=int(horizontal_borders[row_id]),
+                                y_top_left=int(vertical_borders[col_id]),
+                                width=int(horizontal_borders[row_id + 1] - horizontal_borders[row_id]),
+                                height=int(vertical_borders[col_id + 1] - vertical_borders[col_id]))
+                    result_matrix[row_id][col_id] = Cell(bbox=bbox)
         return result_matrix
 
     @staticmethod
     def __split_one_cell(cell: Cell, horizontal_borders: np.ndarray, vertical_borders: np.ndarray, result_matrix: List[List[Cell]]) -> None:
-        left_id, right_id = np.searchsorted(vertical_borders, [cell.x_top_left, cell.x_bottom_right])
-        top_id, bottom_id = np.searchsorted(horizontal_borders, [cell.y_top_left, cell.y_bottom_right])
+        left_id, right_id = np.searchsorted(vertical_borders, [cell.bbox.x_top_left, cell.bbox.x_bottom_right])
+        top_id, bottom_id = np.searchsorted(horizontal_borders, [cell.bbox.y_top_left, cell.bbox.y_bottom_right])
         colspan = right_id - left_id
         rowspan = bottom_id - top_id
         for row_id in range(top_id, bottom_id):
             for column_id in range(left_id, right_id):
-                new_cell = Cell.copy_from(cell,
-                                          x_top_left=vertical_borders[column_id],
-                                          x_bottom_right=vertical_borders[column_id + 1],
-                                          y_top_left=horizontal_borders[row_id],
-                                          y_bottom_right=horizontal_borders[row_id + 1])
+                bbox = BBox(x_top_left=int(vertical_borders[column_id]),
+                            y_top_left=int(horizontal_borders[row_id]),
+                            width=int(vertical_borders[column_id + 1] - vertical_borders[column_id]),
+                            height=int(horizontal_borders[row_id + 1] - horizontal_borders[row_id]))
+                new_cell = Cell.copy_from(cell, bbox)
                 new_cell.invisible = True
                 result_matrix[row_id][column_id] = new_cell
 
@@ -106,20 +108,21 @@ def _merge_close_borders(self, cells: List[List[Cell]]) -> List[List[Cell]]:
         @return: cells with merged borders
         """
         horizontal_borders, vertical_borders = self.__get_borders(cells)
-        eps_vertical = self.eps * min((cell.width for cell in flatten(cells)), default=0)
-        eps_horizontal = self.eps * min((cell.height for cell in flatten(cells)), default=0)
+        eps_vertical = self.eps * min((cell.bbox.width for cell in flatten(cells)), default=0)
+        eps_horizontal = self.eps * min((cell.bbox.height for cell in flatten(cells)), default=0)
         horizontal_dict = self.__get_border_dict(borders=horizontal_borders, threshold=eps_horizontal)
         vertical_dict = self.__get_border_dict(borders=vertical_borders, threshold=eps_vertical)
         result = []
         for row in cells:
             new_row = []
             for cell in row:
-                x_top_left = vertical_dict[cell.x_top_left]
-                x_bottom_right = vertical_dict[cell.x_bottom_right]
-                y_top_left = horizontal_dict[cell.y_top_left]
-                y_bottom_right = horizontal_dict[cell.y_bottom_right]
+                x_top_left = vertical_dict[cell.bbox.x_top_left]
+                x_bottom_right = vertical_dict[cell.bbox.x_bottom_right]
+                y_top_left = horizontal_dict[cell.bbox.y_top_left]
+                y_bottom_right = horizontal_dict[cell.bbox.y_bottom_right]
                 if y_top_left < y_bottom_right and x_top_left < x_bottom_right:
-                    new_cell = Cell.copy_from(cell, x_top_left=x_top_left, x_bottom_right=x_bottom_right, y_top_left=y_top_left, y_bottom_right=y_bottom_right)
+                    bbox = BBox(x_top_left=x_top_left, y_top_left=y_top_left, width=x_bottom_right - x_top_left, height=y_bottom_right - y_top_left)
+                    new_cell = Cell.copy_from(cell, bbox)
                     new_row.append(new_cell)
             result.append(new_row)
         return result
@@ -130,8 +133,8 @@ def __get_borders(cells: List[List[Cell]]) -> Tuple[List[int], List[int]]:
         vertical_borders = []
         for row in cells:
             for cell in row:
-                horizontal_borders.append(cell.y_top_left)
-                horizontal_borders.append(cell.y_bottom_right)
-                vertical_borders.append(cell.x_top_left)
-                vertical_borders.append(cell.x_bottom_right)
+                horizontal_borders.append(cell.bbox.y_top_left)
+                horizontal_borders.append(cell.bbox.y_bottom_right)
+                vertical_borders.append(cell.bbox.x_top_left)
+                vertical_borders.append(cell.bbox.x_bottom_right)
         return horizontal_borders, vertical_borders
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/split_last_hor_union_cells.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/split_last_hor_union_cells.py
index 0b14f034..8dd0bbac 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/split_last_hor_union_cells.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/split_last_hor_union_cells.py
@@ -127,11 +127,11 @@ def _split_row(cell_splitter: Cell, union_cell: List[Cell], language: str, image
 
     # Get width of all union cell
     eps = len(union_cell)
-    x_left = union_cell[0].x_top_left + eps
-    x_right = union_cell[-1].x_bottom_right
+    x_left = union_cell[0].bbox.x_top_left + eps
+    x_right = union_cell[-1].bbox.x_bottom_right
     # get y coordinate from cell before union cell
-    y_top_split = cell_splitter.con_coord.y_top_left
-    y_bottom_split = cell_splitter.con_coord.y_top_left + cell_splitter.con_coord.height
+    y_top_split = cell_splitter.contour_coord.y_top_left
+    y_bottom_split = cell_splitter.contour_coord.y_top_left + cell_splitter.contour_coord.height
     if abs(y_bottom_split - y_top_split) < 10:
         for cell in union_cell:
             cell.lines = []
@@ -141,8 +141,8 @@ def _split_row(cell_splitter: Cell, union_cell: List[Cell], language: str, image
     col_id = len(union_cell) - 1
     result_row = copy.deepcopy(union_cell)
     while col_id >= 0:
-        union_cell[col_id].y_top_left = y_top_split
-        union_cell[col_id].y_bottom_right = y_bottom_split
+        union_cell[col_id].bbox.y_top_left = y_top_split
+        union_cell[col_id].bbox.height = y_bottom_split - union_cell[col_id].bbox.y_top_left
 
         cell_image, padding_value = OCRCellExtractor.upscale(image[y_top_split:y_bottom_split, x_left:x_right])
         result_row[col_id].lines = __get_ocr_lines(cell_image, language, page_image=image,
@@ -162,11 +162,8 @@ def __get_ocr_lines(cell_image: np.ndarray, language: str, page_image: np.ndarra
     for line in list(ocr_result.lines):
         text_line = OCRCellExtractor.get_line_with_meta("")
         for word in line.words:
-            # do absolute coordinate on src_image (inside src_image)
-            word.bbox.y_top_left -= padding_cell_value
-            word.bbox.x_top_left -= padding_cell_value
-            word.bbox.y_top_left += cell_bbox.y_top_left
-            word.bbox.x_top_left += cell_bbox.x_top_left
+            # do absolute coordinates on src_image (inside src_image)
+            word.bbox.shift(shift_x=cell_bbox.x_top_left - padding_cell_value, shift_y=cell_bbox.y_top_left - padding_cell_value)
 
             # add space between words
             if len(text_line) != 0:
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/multipage_table_extractor.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/multipage_table_extractor.py
index 06abe0c2..8d74829d 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/multipage_table_extractor.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/multipage_table_extractor.py
@@ -7,7 +7,7 @@
 from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
 from dedoc.readers.pdf_reader.data_classes.tables.scantable import ScanTable
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.base_table_extractor import BaseTableExtractor
-from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.table_attribute_extractor import TableAttributeExtractor
+from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.table_attribute_extractor import TableHeaderExtractor
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_utils.utils import equal_with_eps
 
 
@@ -21,11 +21,11 @@ def extract_multipage_tables(self, single_tables: List[ScanTable], lines_with_me
         self.single_tables = single_tables
         multipages_tables = []
         list_page_with_tables = []
-        total_pages = max((table.page_number + 1 for table in single_tables), default=0)
+        total_pages = max((table.location.page_number + 1 for table in single_tables), default=0)
         for cur_page in range(total_pages):
             # 1. get possible diapason of neighbors pages with tables
             # pages distribution
-            list_mp_table = [t for t in self.single_tables if t.page_number == cur_page]
+            list_mp_table = [t for t in self.single_tables if t.location.page_number == cur_page]
             list_page_with_tables.append(list_mp_table)
 
         total_cur_page = 0
@@ -86,7 +86,7 @@ def __handle_multipage_table(self,
                 # t2 is merged with t1
                 t1.extended(t2)
                 list_page_with_tables[cur_page].pop(0)
-                self.__delete_ref_table(lines=lines_with_meta, table_name=t2.name)
+                self.__delete_ref_table(lines=lines_with_meta, table_name=t2.uid)
             else:
                 if len(list_page_with_tables[cur_page]) > 0:
                     cur_page -= 1  # analysis from the current page, not the next one
@@ -117,12 +117,12 @@ def __get_width_cell_wo_separating(row: List[Cell]) -> List[int]:
         end = None
         for cell_id, cell in enumerate(row):
             if prev_uid is None:
-                start = cell.x_top_left
-                prev_uid = cell.cell_uid
-            elif prev_uid != cell.cell_uid:
+                start = cell.bbox.x_top_left
+                prev_uid = cell.uuid
+            elif prev_uid != cell.uuid:
                 widths.append(end - start)
-                start = cell.x_top_left
-            end = cell.x_bottom_right
+                start = cell.bbox.x_top_left
+            end = cell.bbox.x_bottom_right
             if cell_id == len(row) - 1:
                 widths.append(end - start)
         return widths
@@ -154,28 +154,28 @@ def __is_one_table(self, t1: ScanTable, t2: ScanTable) -> bool:
             return False
 
         # condition 2. Exclusion of the duplicated header (if any)
-        attr1 = TableAttributeExtractor.get_header_table(t1.matrix_cells)
-        attr2 = TableAttributeExtractor.get_header_table(t2.matrix_cells)
+        attr1 = TableHeaderExtractor.get_header_table(t1.cells)
+        attr2 = TableHeaderExtractor.get_header_table(t2.cells)
         t2_update = copy.deepcopy(t2)
-        if TableAttributeExtractor.is_equal_attributes(attr1, attr2):
-            t2_update.matrix_cells = t2_update.matrix_cells[len(attr2):]
+        if TableHeaderExtractor.is_equal_header(attr1, attr2):
+            t2_update.cells = t2_update.cells[len(attr2):]
 
-        if len(t2_update.matrix_cells) == 0 or len(t1.matrix_cells) == 0:
+        if len(t2_update.cells) == 0 or len(t1.cells) == 0:
             return False
 
-        TableAttributeExtractor.clear_attributes(t2_update.matrix_cells)
+        TableHeaderExtractor.clear_attributes(t2_update.cells)
 
         # condition 3. Number of columns should be equal
-        if len(t1.matrix_cells[-1]) != len(t2_update.matrix_cells[0]):
+        if len(t1.cells[-1]) != len(t2_update.cells[0]):
             if self.config.get("debug_mode", False):
                 self.logger.debug("Different count column")
             return False
 
         # condition 4. Comparison of the widths of last and first rows
-        if t1.check_on_cell_instance() and t2_update.check_on_cell_instance() and not self.__is_equal_width_cells(t1.matrix_cells, t2_update.matrix_cells):
+        if t1.check_on_cell_instance() and t2_update.check_on_cell_instance() and not self.__is_equal_width_cells(t1.cells, t2_update.cells):
             if self.config.get("debug_mode", False):
                 self.logger.debug("Different width columns")
             return False
 
-        t2.matrix_cells = copy.deepcopy(t2_update.matrix_cells)  # save changes
+        t2.cells = copy.deepcopy(t2_update.cells)  # save changes
         return True
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/onepage_table_extractor.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/onepage_table_extractor.py
index c946cccf..c676b3da 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/onepage_table_extractor.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/onepage_table_extractor.py
@@ -1,10 +1,10 @@
 import copy
 import logging
-import uuid
 from typing import List
 
 import numpy as np
 
+from dedoc.common.exceptions.recognize_error import RecognizeError
 from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
 from dedoc.readers.pdf_reader.data_classes.tables.scantable import ScanTable
 from dedoc.readers.pdf_reader.data_classes.tables.table_tree import TableTree
@@ -12,7 +12,7 @@
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.cell_splitter import CellSplitter
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.split_last_hor_union_cells import split_last_column
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.base_table_extractor import BaseTableExtractor
-from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.table_attribute_extractor import TableAttributeExtractor
+from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.table_attribute_extractor import TableHeaderExtractor
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_utils.img_processing import detect_tables_by_contours
 
 
@@ -23,26 +23,18 @@ def __init__(self, *, config: dict, logger: logging.Logger) -> None:
 
         self.image = None
         self.page_number = 0
-        self.attribute_selector = TableAttributeExtractor(logger=self.logger)
+        self.table_header_extractor = TableHeaderExtractor(logger=self.logger)
         self.count_vertical_extended = 0
         self.splitter = CellSplitter()
         self.table_options = TableTypeAdditionalOptions()
         self.language = "rus"
 
-    def extract_onepage_tables_from_image(self,
-                                          image: np.ndarray,
-                                          page_number: int,
-                                          language: str,
-                                          orient_analysis_cells: bool,
-                                          orient_cell_angle: int,  # TODO remove
-                                          table_type: str) -> List[ScanTable]:
+    def extract_onepage_tables_from_image(self, image: np.ndarray, page_number: int, language: str, table_type: str) -> List[ScanTable]:
         """
         extracts tables from input image
         :param image: input gray image
         :param page_number:
         :param language: language for Tesseract
-        :param orient_analysis_cells: need or not analyse orientations of cells
-        :param orient_cell_angle: angle of cells (needs if orient_analysis_cells==True)
         :return: List[ScanTable]
         """
         self.image = image
@@ -50,73 +42,14 @@ def extract_onepage_tables_from_image(self,
         self.language = language
 
         # Read the image
-        tables_tree, contours, angle_rotate = detect_tables_by_contours(image,
-                                                                        language=language,
-                                                                        config=self.config,
-                                                                        orient_analysis_cells=orient_analysis_cells,
-                                                                        table_type=table_type)
-
+        tables_tree, contours, angle_rotate = detect_tables_by_contours(image, language=language, config=self.config, table_type=table_type)
         tables = self.__build_structure_table_from_tree(tables_tree=tables_tree, table_type=table_type)
 
-        for matrix in tables:
-            for location in matrix.locations:
+        for table in tables:
+            for location in table.locations:
                 location.bbox.rotate_coordinates(angle_rotate=-angle_rotate, image_shape=image.shape)
                 location.rotated_angle = angle_rotate
 
-        tables = self.__select_attributes_matrix_tables(tables=tables)
-
-        return tables
-
-    """ TODO fix in the future (REMOVE)
-    def __detect_diff_orient(self, cell_text: str) -> bool:
-        # 1 - разбиваем на строки длины которых состоят хотя бы из одного символа
-        parts = cell_text.split("\n")
-        parts = [p for p in parts if len(p) > 0]
-
-        # 2 - подсчитываем среднюю длину строк ячейки
-        len_parts = [len(p) for p in parts]
-        avg_len_part = np.average(len_parts)
-
-        # Эвристика: считаем что ячейка повернута, если у нас большое количество строк и строки короткие
-        if len(parts) > TableTree.minimal_cell_cnt_line \
-                and avg_len_part < TableTree.minimal_cell_avg_length_line:
-            return True
-        return False
-
-    def __correct_orient_cell(self, cell: Cell, language: str, rotated_angle: int) -> [Cell, np.ndarray]:
-        img_cell = self.image[cell.y_top_left: cell.y_bottom_right, cell.x_top_left: cell.x_bottom_right]
-        rotated_image_cell = rotate_image(img_cell, -rotated_angle)
-
-        output_dict = get_text_with_bbox_from_cells(img_cell, language=language)
-        line_boxes = [
-            TextWithBBox(text=line.text, page_num=page_num, bbox=line.bbox, line_num=line_num, annotations=line.get_annotations(width, height))
-            for line_num, line in enumerate(output_dict.lines)]
-        # get_cell_text_by_ocr(rotated_image_cell, language=language)
-        cell.set_rotated_angle(rotated_angle=-rotated_angle)
-        return cell, rotated_image_cell
-
-
-    def __analyze_header_cell_with_diff_orient(self, tables: List[ScanTable], language: str,
-                                               rotated_angle: int) -> List[ScanTable]:
-
-        for table in tables:
-            attrs = TableAttributeExtractor.get_header_table(table.matrix_cells)
-            for i, row in enumerate(attrs):
-                for j, attr in enumerate(row):
-                    if self.__detect_diff_orient(attr.text):
-                        rotated_cell, rotated_image = self.__correct_orient_cell(attr, language=language, rotated_angle=rotated_angle)
-                        table.matrix_cells[i][j] = rotated_cell
-
-        return tables
-    """
-
-    def __select_attributes_matrix_tables(self, tables: List[ScanTable]) -> List[ScanTable]:
-        for matrix in tables:
-            matrix = self.attribute_selector.select_attributes(matrix)
-
-            if self.config.get("debug_mode", False):
-                self._print_table_attr(matrix.matrix_cells)
-
         return tables
 
     def __get_matrix_table_from_tree(self, table_tree: TableTree) -> ScanTable:
@@ -127,26 +60,20 @@ def __get_matrix_table_from_tree(self, table_tree: TableTree) -> ScanTable:
         matrix = []
         line = []
         for cell in table_tree.children:
-            if len(line) != 0 and abs(cell.cell_box.y_top_left - line[-1].y_top_left) > 15:  # add eps
+            if len(line) != 0 and abs(cell.cell_box.y_top_left - line[-1].bbox.y_top_left) > 15:  # add eps
                 cpy_line = copy.deepcopy(line)
                 matrix.append(cpy_line)
                 line.clear()
 
-            cell_ = Cell(x_top_left=cell.cell_box.x_top_left,
-                         x_bottom_right=cell.cell_box.x_bottom_right,
-                         y_top_left=cell.cell_box.y_top_left,
-                         y_bottom_right=cell.cell_box.y_bottom_right,
-                         id_con=cell.id_contours,
-                         lines=cell.lines,
-                         contour_coord=cell.cell_box)
+            cell_ = Cell(bbox=cell.cell_box, id_con=cell.id_contours, lines=cell.lines, contour_coord=cell.cell_box)
             line.append(cell_)
         matrix.append(line)
 
         # sorting column in each row
-        for i in range(0, len(matrix)):
-            matrix[i] = sorted(matrix[i], key=lambda cell: cell.x_top_left, reverse=False)
+        for i, row in enumerate(matrix):
+            matrix[i] = sorted(row, key=lambda cell: cell.bbox.x_top_left, reverse=False)
 
-        matrix_table = ScanTable(matrix_cells=matrix, bbox=table_tree.cell_box, page_number=self.page_number, name=str(uuid.uuid4()))
+        matrix_table = ScanTable(cells=matrix, bbox=table_tree.cell_box, page_number=self.page_number)
 
         return matrix_table
 
@@ -157,19 +84,33 @@ def __build_structure_table_from_tree(self, tables_tree: TableTree, table_type:
         tables = []
         for table_tree in tables_tree.children:
             try:
-                cur_table = self.__get_matrix_table_from_tree(table_tree)
-                # Эвристика 1: Таблица должна состоять из 1 строк и более
-                if len(cur_table.matrix_cells) > 0:
-                    cur_table.matrix_cells = self.splitter.split(cells=cur_table.matrix_cells)
-
-                    # Эвристика 2: таблица должна иметь больше одного столбца
-                    if len(cur_table.matrix_cells[0]) > 1 or (self.table_options.detect_one_cell_table in table_type and cur_table.matrix_cells[0] != []):
-                        tables.append(cur_table)
-
-                    if self.table_options.split_last_column in table_type:
-                        cur_table.matrix_cells = split_last_column(cur_table.matrix_cells, language=self.language, image=self.image)
+                table = self.__get_matrix_table_from_tree(table_tree)
+                table.cells = self.handle_cells(table.cells, table_type)
+                tables.append(table)
             except Exception as ex:
                 self.logger.warning(f"Warning: unrecognized table into page {self.page_number}. {ex}")
                 if self.config.get("debug_mode", False):
                     raise ex
         return tables
+
+    def handle_cells(self, cells: List[List[Cell]], table_type: str = "") -> List[List[Cell]]:
+        # Эвристика 1: Таблица должна состоять из 1 строк и более
+        if len(cells) < 1:
+            raise RecognizeError("Invalid recognized table")
+
+        cells = self.splitter.split(cells=cells)
+
+        # Эвристика 2: таблица должна иметь больше одного столбца
+        if cells[0] == [] or (len(cells[0]) <= 1 and self.table_options.detect_one_cell_table not in table_type):
+            raise RecognizeError("Invalid recognized table")
+
+        # Postprocess table
+        if self.table_options.split_last_column in table_type:
+            cells = split_last_column(cells, language=self.language, image=self.image)
+
+        self.table_header_extractor.set_header_cells(cells)
+
+        if self.config.get("debug_mode", False):
+            self._print_table_attr(cells)
+
+        return cells
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/table_attribute_extractor.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/table_attribute_extractor.py
index f13f0eec..3dfca0e1 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/table_attribute_extractor.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_extractors/concrete_extractors/table_attribute_extractor.py
@@ -2,31 +2,31 @@
 from typing import List
 
 from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
-from dedoc.readers.pdf_reader.data_classes.tables.scantable import ScanTable
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_utils.utils import similarity
 
 
-class TableAttributeExtractor(object):
+class TableHeaderExtractor:
     """
-    Class finds and labels "is_attributes=True" attribute cells into ScanTable
+    Class finds and labels "is_attributes=True" attribute (header) cells into ScanTable
+
     """
 
     def __init__(self, logger: logging.Logger) -> None:
         self.logger = logger
 
-    def select_attributes(self, scan_table: ScanTable) -> ScanTable:
-        return self.__set_attributes_for_type_top(scan_table)
+    def set_header_cells(self, cells: List[List[Cell]]) -> None:
+        self.__set_attributes_for_type_top(cells)
 
     @staticmethod
-    def is_equal_attributes(attr1: List[List[Cell]], attr2: List[List[Cell]], thr_similarity: int = 0.8) -> bool:
-        if len(attr1) != len(attr2):
+    def is_equal_header(header_1: List[List[Cell]], header_2: List[List[Cell]], thr_similarity: int = 0.8) -> bool:
+        if len(header_1) != len(header_2):
             return False
 
-        for i in range(len(attr1)):
-            if len(attr1[i]) != len(attr2[i]):
+        for i in range(len(header_1)):
+            if len(header_1[i]) != len(header_2[i]):
                 return False
-            for j in range(len(attr1[i])):
-                if similarity(attr1[i][j].get_text(), attr2[i][j].get_text()) < thr_similarity:
+            for j in range(len(header_1[i])):
+                if similarity(header_1[i][j].get_text(), header_2[i][j].get_text()) < thr_similarity:
                     return False
 
         return True
@@ -44,7 +44,7 @@ def check_have_attributes(matrix_table: List[List[Cell]]) -> bool:
     @staticmethod
     def get_header_table(matrix_table: List[List[Cell]]) -> List[List[Cell]]:
 
-        if not TableAttributeExtractor.check_have_attributes(matrix_table):
+        if not TableHeaderExtractor.check_have_attributes(matrix_table):
             return matrix_table[:1]
 
         header_rows = len(matrix_table)
@@ -58,7 +58,7 @@ def get_header_table(matrix_table: List[List[Cell]]) -> List[List[Cell]]:
 
     @staticmethod
     def clear_attributes(matrix_table: List[List[Cell]]) -> None:
-        if not TableAttributeExtractor.check_have_attributes(matrix_table):
+        if not TableHeaderExtractor.check_have_attributes(matrix_table):
             return
 
         for row in matrix_table:
@@ -66,114 +66,87 @@ def clear_attributes(matrix_table: List[List[Cell]]) -> None:
                 cell.is_attribute = False
                 cell.is_attribute_required = False
 
-    def __is_indexable_column(self, matrix_table: List[List[Cell]], column_id: int, max_raw_of_search: int) -> bool:
+    def __is_indexable_column(self, matrix_table: List[List[Cell]], column_id: int, max_row_of_search: int) -> bool:
         # № п/п
-        for i in range(0, max_raw_of_search + 1):
-            if column_id < len(matrix_table[i]) and "№" in matrix_table[i][column_id].get_text() and len(
-                    matrix_table[i][column_id].get_text()) < len("№ п/п\n"):
+        for row in matrix_table[:max_row_of_search + 1]:
+            if column_id < len(row) and "№" in row[column_id].get_text() and len(row[column_id].get_text()) < len("№ п/п\n"):
                 return True
         return False
 
-    def __set_attributes_for_type_top(self, scan_table: ScanTable) -> ScanTable:
-        vertical_union_columns = self.__analyze_attr_for_vertical_union_columns(scan_table)
-        horizontal_union_rows = self.__analyze_attr_for_horizontal_union_raws(scan_table)
+    def __set_attributes_for_type_top(self, cells: List[List[Cell]]) -> List[List[Cell]]:
+        horizontal_union_rows = self.__analyze_attr_for_horizontal_union_raws(cells)
 
-        # simple table
-        if (0 not in horizontal_union_rows) and len(vertical_union_columns) == 0:
-            self.__analyze_attr_for_simple_table(scan_table)
+        if 0 not in horizontal_union_rows:
+            self.__analyze_attr_for_simple_table(cells)
 
-        return scan_table
+        return cells
 
     def __is_empty_column(self, matrix_table: List[List[Cell]], column_id: int) -> bool:
-        all_empty = True
-        for i in range(0, len(matrix_table)):
-            if len(matrix_table[i]) <= column_id:
-                break
-            if matrix_table[i][column_id].get_text() != "":
-                all_empty = False
-                break
-        return all_empty
+        for row in matrix_table:
+            if len(row) <= column_id:
+                return True
+            if row[column_id].get_text() != "":
+                return False
+        return True
 
     def __is_empty_row(self, matrix_table: List[List[Cell]], row_index: int) -> bool:
-        all_empty = True
-        for j in range(0, len(matrix_table[row_index])):
-            if matrix_table[row_index][j].get_text() != "":
-                all_empty = False
-                break
-        return all_empty
-
-    def __analyze_attr_for_vertical_union_columns(self, scan_table: ScanTable) -> List[int]:
-        vertical_union_columns = []
-        if len(vertical_union_columns) != 0 and len(scan_table.matrix_cells) > 1:
-            self.logger.debug("ATTR_TYPE: vertical union table")
-            row_max_attr = 1
-            i = 1
-
-            # Установка атрибутов таблицы
-            for i in range(0, row_max_attr):
-                for j in range(0, len(scan_table.matrix_cells[i])):
-                    scan_table.matrix_cells[i][j].is_attribute = True
-            # Установка обязательных атрибутов
-            scan_table.matrix_cells[0][0].is_attribute_required = True
-            for j in range(1, len(scan_table.matrix_cells[0])):
-                is_attribute_required = True
-                if is_attribute_required:
-                    scan_table.matrix_cells[0][j].is_attribute_required = True
-
-        return vertical_union_columns
-
-    def __analyze_attr_for_horizontal_union_raws(self, scan_table: ScanTable) -> List[int]:
+
+        for cell in matrix_table[row_index]:
+            if cell.get_text() != "":
+                return False
+        return True
+
+    def __analyze_attr_for_horizontal_union_raws(self, cells: List[List[Cell]]) -> List[int]:
         horizontal_union_rows = []
         union_first = False
 
-        for i in range(0, len(scan_table.matrix_cells)):
+        for i in range(len(cells)):
             if len(horizontal_union_rows) > 0 and i not in horizontal_union_rows:
                 horizontal_union_rows.append(i)
-                if not self.__is_empty_row(scan_table.matrix_cells, i):
+                if not self.__is_empty_row(cells, i):
                     break
 
         if union_first and len(horizontal_union_rows) != 0:
             self.logger.debug("ATTR_TYPE: horizontal_union_rows")
-            for i in range(0, len(horizontal_union_rows)):
-                for j in range(0, len(scan_table.matrix_cells[i])):
-                    scan_table.matrix_cells[i][j].is_attribute = True
-            scan_table.matrix_cells[0][0].is_attribute_required = True
+            for i in range(len(horizontal_union_rows)):
+                for j in range(len(cells[i])):
+                    cells[i][j].is_attribute = True
+            cells[0][0].is_attribute_required = True
             first_required_column = 0
             # search indexable_column
             # один один столбец должен быть (0) - нумерованным,
             # один (1) - с обязательными поляями, один (2) - с необязательными
             # поэтому len(matrix_table) > first_required_column + 2
             if len(horizontal_union_rows) > 0 and \
-                    self.__is_indexable_column(scan_table.matrix_cells, first_required_column, max_raw_of_search=horizontal_union_rows[-1]) \
-                    and len(scan_table.matrix_cells) > first_required_column + 2:
-                scan_table.matrix_cells[0][first_required_column + 1].is_attribute_required = True
+                    self.__is_indexable_column(cells, first_required_column, max_raw_of_search=horizontal_union_rows[-1]) \
+                    and len(cells) > first_required_column + 2:
+                cells[0][first_required_column + 1].is_attribute_required = True
 
             # Полностью пустые строки не могут быть атрибутами (не информативны)
             # Перенос атрибутов на след строку таблицы
             index_empty_rows = horizontal_union_rows[-1]
-            if self.__is_empty_row(scan_table.matrix_cells, index_empty_rows) and len(scan_table.matrix_cells) != index_empty_rows + 1:
+            if self.__is_empty_row(cells, index_empty_rows) and len(cells) != index_empty_rows + 1:
                 horizontal_union_rows.append(index_empty_rows + 1)
-                for j in range(0, len(scan_table.matrix_cells[index_empty_rows + 1])):
-                    scan_table.matrix_cells[index_empty_rows + 1][j].is_attribute = True
+                for j in range(0, len(cells[index_empty_rows + 1])):
+                    cells[index_empty_rows + 1][j].is_attribute = True
                 self.logger.debug("detect empty attributes row")
         return horizontal_union_rows
 
-    def __analyze_attr_for_simple_table(self, scan_table: ScanTable) -> None:
+    def __analyze_attr_for_simple_table(self, cells: List[List[Cell]]) -> None:
         self.logger.debug("ATTR_TYPE: simple table")
-        for j in range(0, len(scan_table.matrix_cells[0])):
-            scan_table.matrix_cells[0][j].is_attribute = True
+        for cell in cells[0]:
+            cell.is_attribute = True
+
         # set first required column
-        j = 0
-        first_required_column = j
-        while j < len(scan_table.matrix_cells[0]):
-            if not self.__is_empty_column(scan_table.matrix_cells, j):
-                scan_table.matrix_cells[0][j].is_attribute_required = True
+        first_required_column = 0
+        for j in range(len(cells[0])):
+            if not self.__is_empty_column(cells, j):
+                cells[0][j].is_attribute_required = True
                 first_required_column = j
                 break
-            j += 1
         # search indexable_column
-        # один один столбец должен быть (0) - нумерованным,
-        # один (1) - с обязательными поляями, один (2) - с необязательными
+        # один столбец должен быть (0) - нумерованным,
+        # один (1) - с обязательными полями, один (2) - с необязательными
         # поэтому len(matrix_table) > first_required_column + 2
-        if self.__is_indexable_column(scan_table.matrix_cells, first_required_column, 0) and len(scan_table.matrix_cells) > first_required_column + 2:
-            scan_table.matrix_cells[0][first_required_column + 1].is_attribute_required = True
+        if self.__is_indexable_column(cells, first_required_column, 0) and len(cells) > first_required_column + 2:
+            cells[0][first_required_column + 1].is_attribute_required = True
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py
index c1124ca4..11c30cab 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py
@@ -21,57 +21,36 @@
 class TableRecognizer(object):
 
     def __init__(self, *, config: dict = None) -> None:
-
         self.logger = config.get("logger", logging.getLogger())
-
         self.onepage_tables_extractor = OnePageTableExtractor(config=config, logger=self.logger)
         self.multipage_tables_extractor = MultiPageTableExtractor(config=config, logger=self.logger)
         self.config = config
         self.table_type = TableTypeAdditionalOptions()
 
     def convert_to_multipages_tables(self, all_single_tables: List[ScanTable], lines_with_meta: List[LineWithMeta]) -> List[ScanTable]:
-
         multipage_tables = self.multipage_tables_extractor.extract_multipage_tables(single_tables=all_single_tables, lines_with_meta=lines_with_meta)
         return multipage_tables
 
-    def recognize_tables_from_image(self,
-                                    image: np.ndarray,
-                                    page_number: int,
-                                    language: str,
-                                    orient_analysis_cells: bool,
-                                    orient_cell_angle: int,
-                                    table_type: str = "") -> Tuple[np.ndarray, List[ScanTable]]:
+    def recognize_tables_from_image(self, image: np.ndarray, page_number: int, language: str, table_type: str = "") -> Tuple[np.ndarray, List[ScanTable]]:
         self.logger.debug(f"Page {page_number}")
         try:
-            cleaned_image, matrix_tables = self.__rec_tables_from_img(image,
-                                                                      page_num=page_number,
-                                                                      language=language,
-                                                                      orient_analysis_cells=orient_analysis_cells,
-                                                                      orient_cell_angle=orient_cell_angle,
-                                                                      table_type=table_type)
-            return cleaned_image, matrix_tables
+            cleaned_image, scan_tables = self.__rec_tables_from_img(image, page_num=page_number, language=language, table_type=table_type)
+            return cleaned_image, scan_tables
         except Exception as ex:
             logging.warning(ex)
             if self.config.get("debug_mode", False):
                 raise ex
             return image, []
 
-    def __rec_tables_from_img(self,
-                              src_image: np.ndarray,
-                              page_num: int,
-                              language: str,
-                              orient_analysis_cells: bool,
-                              orient_cell_angle: int,
-                              table_type: str) -> Tuple[np.ndarray, List[ScanTable]]:
+    def __rec_tables_from_img(self, src_image: np.ndarray, page_num: int, language: str, table_type: str) -> Tuple[np.ndarray, List[ScanTable]]:
         gray_image = cv2.cvtColor(src_image, cv2.COLOR_BGR2GRAY) if len(src_image.shape) == 3 else src_image
 
         single_page_tables = self.onepage_tables_extractor.extract_onepage_tables_from_image(
             image=gray_image,
             page_number=page_num,
             language=language,
-            orient_analysis_cells=orient_analysis_cells,
-            orient_cell_angle=orient_cell_angle,
             table_type=table_type)
+
         if self.config.get("labeling_mode", False):
             self.__save_tables(tables=single_page_tables, image=src_image, table_path=self.config.get("table_path", "/tmp/tables"))
         if self.table_type.detect_one_cell_table in table_type:
@@ -128,11 +107,8 @@ def __if_not_table(self, table: ScanTable, image: np.ndarray) -> bool:
         std = table_image.std()
         white_mean = (table_image > 225).mean()
         black_mean = (table_image < 225).mean()
-        table_area = bbox.width * bbox.height
-        cells_area = 0
-        for row in table.matrix_cells:
-            for cell in row:
-                cells_area += cell.width * cell.height
+        table_area = bbox.square
+        cells_area = sum([cell.bbox.square for row in table.cells for cell in row])
 
         ratio = cells_area / table_area
         res = (white_mean < 0.5) or (black_mean > 0.3) or (std < 30) or (mean < 150) or (mean < 200 and std < 80) or ratio < 0.65
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/accuracy_table_rec.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/accuracy_table_rec.py
deleted file mode 100644
index f18b7505..00000000
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/accuracy_table_rec.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import csv
-import json
-import os
-from typing import List, Tuple
-
-import cv2
-
-from dedoc.config import get_config
-from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
-from dedoc.readers.pdf_reader.data_classes.tables.scantable import ScanTable
-from dedoc.readers.pdf_reader.pdf_image_reader.pdf_image_reader import PdfImageReader
-
-
-def _create_cell(c: str, text_cells: list) -> Cell:
-    cell = Cell(x_bottom_right=-1, x_top_left=-1, y_top_left=-1, y_bottom_right=-1)
-    if "a" in c:
-        cell.is_attribute = True
-    # loading cell text
-    if len(text_cells) != 0:
-        cell_text = [r for r in text_cells if r[0] == c]
-        if len(cell_text) != 0:
-            cell.text = cell_text[0][-1]
-    return cell
-
-
-def load_from_csv(path_csv: str, path_class_2_csv: str = "") -> List[List[Cell]]:
-    text_cells = []
-    if path_class_2_csv != "":
-        csv_file_class_2 = open(path_class_2_csv, "r", newline="")
-        reader_class_2 = csv.reader(csv_file_class_2)
-        text_cells = [r for r in reader_class_2]
-
-    matrix = []
-    with open(path_csv, "r", newline="") as csv_file:
-        reader = csv.reader(csv_file)
-
-        for raw in reader:
-            if len(raw) >= 5 and raw[0] == "bbox":
-                pass
-            else:
-                line = [_create_cell(c, text_cells) for c in raw if c != ""]
-                if len(line) != 0:
-                    matrix.append(line)
-    return matrix
-
-
-def get_quantitative_parameters(matrix: List[List[Cell]]) -> Tuple[int, int, int, int]:
-    cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = 0, 0, 0, 0
-
-    # calculating data
-    if len(matrix) > 0:
-        cnt_columns = len(matrix[0])
-    cnt_rows = len(matrix)
-
-    for i in range(0, len(matrix)):
-        for j in range(0, len(matrix[i])):
-            if matrix[i][j].is_attribute:
-                cnt_a_cell += 1
-
-            cnt_cell += 1
-
-    return cnt_a_cell, cnt_cell, cnt_columns, cnt_rows
-
-
-def calc_agreement(matrix_gt: List[List[Cell]], matrix: List[List[Cell]]) -> float:
-    q_params = get_quantitative_parameters(matrix)
-    q_params_gt = get_quantitative_parameters(matrix_gt)
-
-    equal_indexes = [i for i in range(0, len(q_params)) if q_params[i] == q_params_gt[i]]
-
-    agreement = 1.0 * len(equal_indexes) / len(q_params_gt)
-    return agreement
-
-
-def draw_recognized_cell(tables: List[ScanTable], path_image: str, path_save: str) -> None:
-    img = cv2.imread(path_image)
-    for t_index in range(0, len(tables)):
-        table = tables[t_index].matrix_cells
-        bbox = tables[t_index].locations.location
-        blue_color, green_color, red_color = (255, 0, 0), (0, 255, 0), (0, 0, 255)
-        cv2.rectangle(img, (bbox.x_top_left, bbox.y_top_left), (bbox.width, bbox.height), blue_color, 6)
-        for i in range(0, len(table)):
-            for j in range(0, len(table[i])):
-                cv2.rectangle(img, (table[i][j].x_top_left, table[i][j].y_top_left), (table[i][j].x_bottom_right, table[i][j].y_bottom_right), red_color, 4)
-                cv2.putText(img, str(table[i][j].id_con), (table[i][j].x_top_left, table[i][j].y_bottom_right), cv2.FONT_HERSHEY_PLAIN, 4, green_color)
-    cv2.imwrite(path_save, img)
-
-
-def save_json(tables: List[ScanTable], number_test_string: str, path_output: str) -> None:
-    for i in range(0, len(tables)):
-        with open(f"{path_output}{number_test_string}_table_{i}.json", "w") as out:
-            json.dump(tables[i].to_dict(), out, ensure_ascii=False, indent=2)
-
-
-def calc_accuracy(path_image: str, path_gt_struct: str, path_gt_text: str, path_save_image: str, path_save_json: str) -> None:
-    from os import listdir
-    from os.path import isfile, join
-
-    os.makedirs(path_save_image, exist_ok=True)
-    os.makedirs(path_save_json, exist_ok=True)
-
-    image_files = [f for f in listdir(path_image) if isfile(join(path_image, f))]
-    agreements = []
-
-    for image_file in image_files:
-        name_example = image_file.split(".")[0].split("_")[0]
-        # predict tables
-        image = cv2.imread(path_image + image_file, 0)
-        # TODO fix this
-        clean_images, tables = PdfImageReader(config=get_config()).get_tables([image])
-        draw_recognized_cell(tables, path_image + image_file, path_save_image + image_file)
-        save_json(tables, name_example, path_save_json)
-
-        gt_files = [f for f in listdir(path_gt_struct) if isfile(join(path_gt_struct, f)) and name_example + "_" in f]
-        for index_table in range(0, len(gt_files)):
-
-            csv_filename = path_gt_struct + name_example + "_" + str(index_table + 1) + ".csv"
-            csv_text_filename = path_gt_text + name_example + "_" + str(index_table + 1) + "_text.csv"
-            if os.path.exists(csv_filename):
-                if not os.path.exists(csv_text_filename):
-                    csv_text_filename = ""
-                # load_GT
-                matrix_cell_gt = load_from_csv(csv_filename, csv_text_filename)
-                # calc agreement
-                if len(tables) == 0 and matrix_cell_gt == []:
-                    agreements.append(1.0)
-                elif len(tables) <= index_table:
-                    agreements.append(0)
-                else:
-                    agreement = calc_agreement(matrix_cell_gt, tables[index_table].matrix_cells)
-                    agreements.append(agreement)
-
-
-if __name__ == "__main__":
-    current_path = os.path.dirname(__file__) + "/"
-    calc_accuracy(current_path + "../../backend/test_dataset_table/images/",
-                  current_path + "../../backend/test_dataset_table/GT_struct/",
-                  current_path + "../../backend/test_dataset_table/GT_text/",
-                  "/tmp/backend_claw/out_tables/acc/draw_tables/",
-                  "/tmp/backend_claw/out_tables/acc/json_tables/")
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py
index c060d9d6..6bc12eab 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py
@@ -246,15 +246,9 @@ def __paint_bounds(image: np.ndarray) -> np.ndarray:
     return image
 
 
-def detect_tables_by_contours(img: np.ndarray,
-                              language: str = "rus",
-                              orient_analysis_cells: bool = False,
-                              table_type: str = "",
-                              *,
-                              config: dict) -> [TableTree, List[np.ndarray], float]:
+def detect_tables_by_contours(img: np.ndarray, language: str = "rus", table_type: str = "", *, config: dict) -> [TableTree, List[np.ndarray], float]:
     """
     detecting contours and TreeTable with help contour analysis. TreeTable is
-    :param orient_analysis_cells:
     :param img: input image
     :param language: parameter language for Tesseract
     :param config: dict from config.py
diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/utils.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/utils.py
index 19674772..693b8417 100644
--- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/utils.py
+++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/utils.py
@@ -1,7 +1,9 @@
-import difflib
+from typing import List, Tuple
 
 import numpy as np
 
+from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
+
 
 def equal_with_eps(x: int, y: int, eps: int = 10) -> bool:
     return y + eps >= x >= y - eps
@@ -20,24 +22,19 @@ def get_highest_pixel_frequency(image: np.ndarray) -> int:
 
 def similarity(s1: str, s2: str) -> float:
     """string similarity"""
+    import difflib
+
     normalized1 = s1.lower()
     normalized2 = s2.lower()
     matcher = difflib.SequenceMatcher(None, normalized1, normalized2)
     return matcher.ratio()
 
 
-MINIMAL_CELL_CNT_LINE = 7
-MINIMAL_CELL_AVG_LENGTH_LINE = 10
-
-
-def detect_diff_orient(cell_text: str) -> bool:
-    # 1 - разбиваем на строки длины которых состоят хотя бы из одного символа
-    parts = cell_text.split("\n")
-    parts = [p for p in parts if len(p) > 0]
+def get_statistic_values(cells: List[List[Cell]]) -> Tuple[int, int, int, int]:
 
-    # 2 - подсчитываем среднюю длину строк ячейки
-    len_parts = [len(p) for p in parts]
-    avg_len_part = np.average(len_parts)
+    cnt_rows = len(cells)
+    cnt_columns = len(cells[0]) if cnt_rows else 0
+    cnt_cell = cnt_columns * cnt_rows
+    cnt_attr_cell = len([cell for row in cells for cell in row if cell.is_attribute])
 
-    # Эвристика: считаем сто ячейка повернута если у нас большое количество строк и строки короткие
-    return len(parts) > MINIMAL_CELL_CNT_LINE and avg_len_part < MINIMAL_CELL_AVG_LENGTH_LINE
+    return cnt_attr_cell, cnt_cell, cnt_columns, cnt_rows
diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py
index 1d0d594d..b60cbed7 100644
--- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py
+++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py
@@ -29,6 +29,10 @@ class PdfTabbyReader(PdfBaseReader):
     def __init__(self, *, config: Optional[dict] = None) -> None:
         import os
         from dedoc.extensions import recognized_extensions, recognized_mimes
+        from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.onepage_table_extractor import \
+            OnePageTableExtractor
+        from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.table_attribute_extractor import \
+            TableHeaderExtractor
 
         super().__init__(config=config, recognized_extensions=recognized_extensions.pdf_like_format, recognized_mimes=recognized_mimes.pdf_like_format)
         self.tabby_java_version = "2.0.0"
@@ -36,6 +40,8 @@ def __init__(self, *, config: Optional[dict] = None) -> None:
         self.jar_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "tabbypdf", "jars"))
         self.java_not_found_error = "`java` command is not found from this Python process. Please ensure Java is installed and PATH is set for `java`"
         self.default_config = {"JAR_PATH": os.path.join(self.jar_dir, self.jar_name)}
+        self.table_header_selector = TableHeaderExtractor(logger=self.logger)
+        self.table_extractor = OnePageTableExtractor(config=config, logger=self.logger)
 
     def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, extension: Optional[str] = None, parameters: Optional[dict] = None) -> bool:
         """
@@ -132,9 +138,7 @@ def __extract(self, path: str, parameters: dict, warnings: List[str], tmp_dir: s
         mp_tables = self.table_recognizer.convert_to_multipages_tables(all_scan_tables, lines_with_meta=all_lines)
         all_lines = self.linker.link_objects(lines=all_lines, tables=mp_tables, images=all_attached_images)
 
-        tables = [scan_table.to_table() for scan_table in mp_tables]
-
-        return all_lines, tables, all_attached_images, document_metadata
+        return all_lines, mp_tables, all_attached_images, document_metadata
 
     def __save_gost_frame_boxes_to_json(self, first_page: Optional[int], last_page: Optional[int], page_count: int, path: str, tmp_dir: str) -> str:
         from joblib import Parallel, delayed
@@ -158,8 +162,7 @@ def __save_gost_frame_boxes_to_json(self, first_page: Optional[int], last_page:
         return result_json_path
 
     def __get_tables(self, page: dict) -> List[ScanTable]:
-        import uuid
-        from dedoc.data_structures.cell_with_meta import CellWithMeta
+        from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
         from dedoc.data_structures.concrete_annotations.bbox_annotation import BBoxAnnotation
         from dedoc.data_structures.line_metadata import LineMetadata
 
@@ -170,7 +173,7 @@ def __get_tables(self, page: dict) -> List[ScanTable]:
 
         for table in page["tables"]:
             table_bbox = BBox(x_top_left=table["x_top_left"], y_top_left=table["y_top_left"], width=table["width"], height=table["height"])
-            order = table["order"]  # TODO add table order into TableMetadata
+            order = table["order"]
             rows = table["rows"]
             cell_properties = table["cell_properties"]
             assert len(rows) == len(cell_properties)
@@ -187,20 +190,29 @@ def __get_tables(self, page: dict) -> List[ScanTable]:
                     for c in cell_blocks:
                         cell_bbox = BBox(x_top_left=int(c["x_top_left"]), y_top_left=int(c["y_top_left"]), width=int(c["width"]), height=int(c["height"]))
                         annotations.append(BBoxAnnotation(c["start"], c["end"], cell_bbox, page_width=page_width, page_height=page_height))
-                    """
-                        TODO: change to Cell class after tabby can return cell coordinates. Then set type Cell in class "ScanTable"
-                        https://jira.intra.ispras.ru/browse/TLDR-851
-                    """
 
-                    result_row.append(CellWithMeta(
+                    current_cell_properties = cell_properties[num_row][num_col]
+                    bbox = BBox(x_top_left=int(current_cell_properties["x_top_left"]),
+                                y_top_left=int(current_cell_properties["y_top_left"]),
+                                width=int(current_cell_properties["width"]),
+                                height=int(current_cell_properties["height"]))
+
+                    result_row.append(Cell(
+                        bbox=bbox,
                         lines=[LineWithMeta(line=cell["text"], metadata=LineMetadata(page_id=page_number, line_id=0), annotations=annotations)],
-                        colspan=cell_properties[num_row][num_col]["col_span"],
-                        rowspan=cell_properties[num_row][num_col]["row_span"],
-                        invisible=bool(cell_properties[num_row][num_col]["invisible"])
+                        colspan=current_cell_properties["col_span"],
+                        rowspan=current_cell_properties["row_span"],
+                        invisible=bool(current_cell_properties["invisible"])
                     ))
                 cells.append(result_row)
 
-            scan_tables.append(ScanTable(page_number=page_number, matrix_cells=cells, bbox=table_bbox, name=str(uuid.uuid4()), order=order))
+            try:
+                cells = self.table_extractor.handle_cells(cells)
+                scan_tables.append(ScanTable(page_number=page_number, cells=cells, bbox=table_bbox, order=order))
+            except Exception as ex:
+                self.logger.warning(f"Warning: unrecognized table on page {self.page_number}. {ex}")
+                if self.config.get("debug_mode", False):
+                    raise ex
 
         return scan_tables
 
diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py
index 4cebbaf4..385f02a8 100644
--- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py
+++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py
@@ -52,8 +52,6 @@ def _process_one_page(self,
                 image=gray_image,
                 page_number=page_number,
                 language=parameters.language,
-                orient_analysis_cells=parameters.orient_analysis_cells,
-                orient_cell_angle=parameters.orient_cell_angle,
                 table_type=parameters.table_type
             )
         else:
@@ -87,7 +85,7 @@ def _move_table_cells(self, tables: List[ScanTable], page_shift: BBox, page: Tup
             shift_x, shift_y = page_shift.x_top_left, page_shift.y_top_left  # shift tables to original coordinates
             for location in table.locations:
                 location.bbox.shift(shift_x=shift_x, shift_y=shift_y)
-            for row in table.matrix_cells:
+            for row in table.cells:
                 for cell in row:
                     cell.shift(shift_x=shift_x, shift_y=shift_y, image_width=image_width, image_height=image_height)
 
@@ -97,7 +95,7 @@ def __change_table_boxes_page_width_heigth(self, pdf_width: int, pdf_height: int
         """
 
         for table in tables:
-            for row in table.matrix_cells:
+            for row in table.cells:
 
                 for cell in row:
                     cell.change_lines_boxes_page_width_height(new_page_width=pdf_width, new_page_height=pdf_height)
diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/tabbypdf/jars/ispras_tbl_extr.jar b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/tabbypdf/jars/ispras_tbl_extr.jar
index 2b3c916fa11d5468b0e1ad78ec2cd0d938c7d843..8a737c79cc911778c955682ca553cd68e68059a0 100644
GIT binary patch
delta 10750
zcmZ8{bzD?U6gGQFLAtw<M!GwtySta}hE)`4P#3r~f|N)INJuD+G)stdcZan2A@A4w
z&z$p|m}kz;o!#HLlNp8C7lnzft%i(3goJ^Cf#eYCSB-rPLB1brApkq@AaJ8BTK^N^
zcn*xn%J-v?12-&;$RX|T_Z@zX^&L%s_((=ZUW7nL8@V7`{wWF*9?FFFGvR0QCE@`_
zA`(Y3iaL;PWIP^;BPl!zd_{>K<ZN%Mz@!}iz_|?nK&L4SC#QL8f0T{Zs<KR`F7jhx
zn#o1+Zx^jMp;kCl5y7<25oZw?6cUgi|I4K(L|nXJh5_sm|Fo%KZSb@5ZidU=Zn(Fr
zM7tjE7hYW1HB`JY1iDzheXy8E_gF^R_QsexyQr4ztCj?Z;tCT-*94imni3i1d^e1)
zcRKA@mBz|s_;OD(UAEnlnXk$Vn$GnF(n>S;ta~u6J|^*<F%t!T9&OA>ujx?D;xd#-
zwco}9o?^f{O?CGT;)fpReJhaEsq1pX_b)3bb`a`?>-46PJ1Q%4Qp-sPDI6eCKUq=`
z@`}~XeOhoJ(v?6g`ec_ZM$h;#&2R9Ftx#*O23<lK+4WQxb4K;f7LCK6HMP7KdwZ5<
z(J%VRUJk|>FARAd^0uecoxG9Sqw__tZhxjvzS&WSHFOpC5SBynUCTOXv<x1-=K3V(
zk5sZt7R8SdV#E;rreP{;zlGgfxGW6%dF<Gp!;>bF7m{7H>Xt1}794i%o96FVlaxr@
zt13m}J4b_K&AB|dAY4~CR$ZpaF<cqPB_vMxv)pg6#&CEA&e@kq5yZ5%>Yp8bD(adh
z-Y4}Pwqo{08)$xXwTn61G~-`*VpBjRye+<2T&qWvQ((s)A;bE`*H=!=z4D7i*ma=#
z&|F&eJKZ|5;#ENWE&xhuIDzAjX^1yS-1V6HcY2?|I!!azuenu@rW2aLExAU#T4d)@
zUUc7S+Fl2wl2r0U0+KMZa-W-G{=(rKDelY?*eb)|rsjGhtDb9-9dumPJ2T~LcY$vC
z&zfVq8*6%QoFxKkupkysDlJNK!O?NYj)QYDlJp0To-)5_qZ$)&Qz-q6=Bl|<t-P`Y
zjRHse6urx{#GWD<5Rc@-5DMdevMzG^MkW;L88w@qmli%DzX4pguiM*jz_Tt;J*oC1
zY}qdL(WzqU&$kfDnTC0Did7A+VnH5{y5l&{%)(Uq?3sA-)_@XrPI$A2X#ST5u0;Cs
zu62lc{;I0^ZwpV7ixsSo<(>kBIeRy<#PhQ|)$|lIWL=YP;Wf3bCZ_5{Q#*|~pRP35
z9+zn-*>UI(x-we}tF(DAsc$Rp&#ZEn!00?y+>B|GP2GJCOsN}|3~%OwBlR~m(|o7;
zzV3!f2cBGrTm%R;a*=qt*RCxkiXEi9JiT0kitf#e9kjD(67~!)6II1Ov7=h^YM>~K
zcjBzf<7yOsULke>zh>Lg+?a`VD*h5^%n@}6p~hXIuTLmay>tz&$c+9PJZ-@E1g7P(
ztIkcFN;7$!UP`n)#4lc<E<rH>W$B=uI({_~@$_KA9Z#z|T5r2lA%HR$aa^wO^U2#p
zv||aUfjAb9NkmEvSN$uy=p@o^;xX%8GSV*Y4U_3ar-mf(RWDD{Gk*-1iZCIxW6TER
zctHZMyrikM9UPH6tQCJL6AcI17FZw65-;Z~+Le8z1;laXmcaynW0(p<np^7auh+<8
zIB{)KoL)+Ue9YfuZ)1?~PhTYEEy*w7HJ6X8+lQp2W$j@_UdH?x&z-&FH+<8YrOu^=
zc8s+!{HT*KF8ks_EMmR5yDP<^Xy)Z*`ZPbaBi1ep=ikEBDFVFIn!OEmq`xrrttm#o
zm-BQ{M~`q&j(Oa|oW$ryzwcZMS3e6Aol@GzT9MWt77#2U&}k1VUj2TwSpTG?*glNC
z@`ptTyaY2YtN-v=8geV!`&SibBk)6<Nk**djPq^Y#Ms{pOEs)-%3R@nG`0?_X&EsM
zF%(w2m+&pTgwuJ(A9Og=5`Frxrg?RY5&Yav>fDg1$M1S1;!UQDnQ#L-^r)Hg;s?5T
zNU2c$Fm-ctkZnznR}He#`;!W!#I{pB#a(QDU82HQ*UfpeQXTCanhNFiFtqW((`tSC
zgR~LTE4RnGY<W&BaJGSz7t(}G5g3<QglqxWM$F`5scHl&xr4k}gw^V>=V)FDZhdLi
zWkga6f2!rBn%8R$r6{lDOSS}lxHs|}RPg!4XqI5=&S-@@-3)fu>J9MAm5xGL4QALn
zMZO06GQJ5XvzzW%7GRKJ^e-cr#|$0a?_}vFcw~6)SJ_hOCmUT#adRo}v?Gw&<3*@F
zZudH;tIodwF23ROsZ6j3Mh^E~DDBg=_Y-O^kLnf>^{|x{6if8a8rgGf&;B%iN!5c%
zp!KEQ&n1|U)L3_m&_8o+>`4(SzhcQp(Z`#E0t<+Ds*U1Z&66whF2^2qU6q!6JYP9;
zV?QD3k8|d56X9R&1Q164qAEoDy<jqo$l*9G+gf<5dCWKv*Gnd<1w%t2Hh**#mP67=
zU*!&Zv&fKc=89&kW0DuDU1jTwEl_A>J`%5j(sPaV`Q4<O_n*9ZM4+X6CGkRl%TZk~
zx{oW_#7>Nh<mVfTjdCH6YuS}h#OySAGG-@1PFj;jK;=u<DYk9LVZ~bAj7IwkJTbqf
zLFU0j)gM2GnoSLJd|<?tIPaKZ5}+CqBp4AznqadfT(;6u1536^*EHS-X;GC}(rQ?q
z(q!_P%Q;udoztOi9i5$FS+}}CCX^TneY<XEg|n=@vO)Qe@#I8kwvHJ`$t98W;i0zg
zw*~*fP12C;_uURE`|h{vPr1D<F~a;Ny@%)K!gWtfIk%*oV`13yduG$iGeN36stLPg
zM?niYi`K{Ow}XK~QN`uXvHqoZgzppg+%CS%WQbz=QoHMwlUn8MWsrQ0#8~%V4B+^R
zlg?Vy|MuPQS0DD16a{8eR&p8aOZ4Mv^=b)DplJp6dsM%<G1ke9b+tt^2jckC;wxSZ
zq#2pFpZ9aLyd#H6HsjM0y{RbkC2p46o6#51GSx|fzD$B95n?IaA##$W#*C*`1~y_R
zsK*;D*vd*X_&xrTO#1U&R9A+_QS^TmSVcOYUqY(WM6CH2+6`n9P@$cNmEEX6Q9dIR
z3@$i)K@q5~%@i(Y@wZWTt}aHYKOjwlH?@Y(aAt2wr;H0myiU@P-0-&WN=4`MsMd}A
znOH0Sd*Q2fc~5*a+8olxqT;lk?CSQ2#~OlWB5#cdg#4*Fsg!QsKppw|xyj5yRj&QL
zt@UKK#d@8x64LXiR`sCXxO96K0&F+2X2mKE7Q5|dkNDv_%Yl;A?*kvne}n&oh_ziT
zS~;#4JM-JY=psiikI7_=??y4MHc_%mBchk|f6t;fU32+G42&B6$&G$?`#X|9sS-f2
zzs|KWUK^^I!!A49larCUO6?5DS21Kx4Mlj~d88c5{q671*~;sF*8DWa=w(OAaRY07
z9H)|__lAVxLOjI=4cPs)`ROt$NjYNjwqorqg-8@GOdJwi?g2roK6!~qPNnH`N$!HC
z$6yY&Yd0x<EKezL-4-jlHh17;n=}$HgLRFy+|Ev9?I5E_$jTfp4#w#8=XExb@n)w|
zv@0dps}pA#4UKL~U?<B_54RPG=#lF|KV|S2_wgzj5^!i#FqzT_?T1`mNeI1h<mD9i
ztTD)infHEY&f?960h>5Wo_C#Z@(Lg(>Yr#0MQw$5YSIM{LlWz0uZ991*J`+C^M>J)
z;0b-mCXL6q!Qby_{qw^!m5~H@pul~>Qh#XiMG?;`pS7@<(^F_9l?ZD*@yesB69w#9
zob2weK|;IXIA~8DF?wonhiwLfCVVOdm%_{=VULWvOd5#b-l)aa*v0J0>Z`ZZ9Yf1#
zO>f;f+ht5Y+>XBK%t?lCvox7pZLswE%9_i&Zj+qP5&tML+ZH+>7wg$gsJ@!wcYLWz
zyGWmc;zb{x2(R<R!?0eS5{dO_IL$k-{`xx|`b0M`YuOHKNqY8KB6-sH-JC^=s^%kX
zTv(N@um!yP_v!2CsiTt(PLI`fG|L5gwX);WlS;{7>bDU%12eVg7p{9}?r~=-8f_SR
z?E!KcqZl(U5ES-B47C+3BZvZ)wcMFrjHo<LsR|X%b8Z*@lbWEAx20{Dn<ybRImr7&
zth;iqC^L5QEg`}4hA0xVZ=I0s{2ZsX%V6Szp{O@yDM-jSk~rts=r?J?J8{80w-F@L
z3y~ObSQ%aoRQv=L=Wh<<@}|WZLC#|s$#*vkVV3U#Fpk??-i|-GMOQ+tuklek&Xeoy
zFcalwc?A><`+4&wh`uJV35k4y!Sz0*PvX7$q~xucxN_tfb@CB;k9cX3dMOaNb%K%P
zIUPr}!Tgr0s4gkdrQAC487O6G%w%4*;B>&^YYNisbW)16(=dOE#^m4>{8B9U#<L-<
zZkJUs@46cU>S_eTUDeaQJyS?hUK_Mcb~MN2O!l%Gc86Y)DzVBjE77to;47)n<4W>M
z$T={g2y&|Z32~e?6rL@Gfo{I<VquzFnd*MFQ}zXLbLW>bPnxwa)(T&!PMM!KVa+ol
zNME<lZY6Wqw^<NT1ZU&=nQFYfv`??_tJc}G@93=2RW+J=Wl_Ya^y&k}@EGf4Gxam(
zT;8Gp{J0pt4}(ycXMg|4nr}*r=1sr(uSKaY2Kt`~cVbY!#vCTk^3Dmh{i*{S_TX#k
z`I;^1;FsttDn779vFURTA_RxmsWx@%__9SVoaQKtii-a1iws1rn!BWstp~ur`Q5zr
zO;C9?%%{p!sq_LLKk}1NM$j_VRYLqp&o?ELSHm3ppN6g1P_EQTRHgH8-+##~NU7#h
zFHIfpZqtMmi?>lgxV@fZ0~6CQnOBY&0(#F_1^V|SvJVwBez(TtQ=&$0N^t2L=}Ngv
z@4@wR+_zL=mc!MxKaZNA%`xNkh$FG!P@UwK?rZY}?!B}I{3eB(vOW(<d>0bd9k~ji
zJEy*oPXZy>u4i#7J}=nHMHw^7_rj3%$7X3rVU9s-$5CuqXI|i;qQ*<uoX7Z8aTAiF
z1TtBAyPmA17ApUhWp=mla#VWU^sOhdI*p4dB8HL|ch?(V$VMu+7uf5RYe|kj=J+Nl
zy-B{pkxO;!7C^5~5#vY^OG1%MrXkNESddr{qX(!tlc$})#oFN(;*!sqB$J5t<<S4&
zL9dZW!V@&FsnZZf#+9&jn<z)?BE#5$I`2lgtQQ=0YTl`79qjuQ<`os)f-^adhM^c!
zP%YcKdgLkBX0Mm^w8Acj)`<Z~!cf{}$9=5wNt|U8E!w#qUazc8f%4OYo{*8pZbt$u
z0a}CyM@*j)<FVqBpDtO<Up7l>EzykNxSh1T9?A`@?)JsI2$6zy8hSzwPS1P)EVKu;
ziBsT)l&sJnO1PcYFWY!hBnMS>eOSmkPQSo$yYOg-FWelZT*ykShGUL|Sata>7#+1-
z&{-bcQHi(YttcKIR*#2bG>6gKhlf|s3X}0%DcK}fq4C@dcUvy{wX9?%8iiwdMSJ3(
z-=wYhFY>i4n?=DI!lqW-7rBCZF9a5T?{|cL6ZUSqpvSq6NW2KoTUP#7b!7LQmiChI
zG8RTBb%xum^{4arE$ndw%PF=Namm;!QGs~y7cNJwT$RA%6oO)OR=L1&$!yBqx>_iq
zp8T&u1B7F)%k`4)HM574R8GM}WO8t-mJ!0QW%v_tev1N|k-K0>mi&@Xf3o0n9?SJr
zI`q)6DVdpaZ-8_wVq<rAx}6Y#JV<)nq6Q6lGs1dIRhaV5sQ=DX5tp|vp*5|*Jz;ks
zB1;YTv@CD)QBw|0zB2x~-&n~R*&)|ac|kH%yb4p&b&Ik7@DWjP=Jw~z`I9>NnYyz=
z<2C36`xYCl`p0--j)K;y>eRQ<pJV)IoF~8XNF<+x?ahDM3J!VU1kG)o+fFy)gPfT4
zTWN@=YstU;9%*UMMniT7cYadUBGPT8)Eu<lslQ0eI&glC*tvNVFs_ENRxC$Z8_g?0
zTvNBM+_)D+_81Kh+gboO<^VSPT%Hxf8cQoKx;g<LCKQet{?a7n0vioM|Cp|rrsEfq
zl#R;*czK3_Th(s4Ud_5hsSwnQUrvH(<>!@@`QsZDK}@ymq;T0>BQ8KOj4~~TAO3yh
zCz8P|imnfi&HI_!Bdwq<4reQQ@yH5RDUZH3knBh>Oxf$iDnKumV)QUF+R|{OqV+6}
z;xg8fdG)W=ItyBa0Q3@NcFVn|Mg9;WO9n&JRzt_IB#Om8$45dsWrX8DqN>#m%%Xf~
zA3*>@I5M2@p{GO+cjtK!VmolCAB4<B1@(VI!!iftknBMkSAbxl{D-MPivN@1G$1*A
z$o1}iAp3I&GmL7|U&?4^9#Pp-&#9JXZ>Ci5u<PE(N0PP<a+(CmrX{$op`-AdCK|a`
z1*FcuwQ7;0qIpDyk^G#kkjZ^@OoqVR&CdBb(<=d76}7Qq_Zi*Hv_g#;nXxn(>|xl!
z;KA_lu*YQq&w`}yLF-@9i|oI-p^lw_QbFgz45rL9m@sX8VvaSPJ~b)Xb@;Kj<jl7U
zn6&~fxhW^rB<j^qC0l&ER=T2UTA0QM%v6lbS}XC=IxWh#&Y_+R`PAeIOo>QrMAoIc
zvfYUqA0jek`H3@&bjgQEh(3?jJ^k>SrPvPrU1UERzslpW&QBYq`t52h(JQgXOs*Sc
z`pkj|6?GVxNs+oy-6Kqr7>%Pc38cE16fqU^a&h{Te$?-6nywDA)s0t5+F51HKy<Tn
zEh9|Of4{<i1l5`SrJI`8mr8i7u;wh`XrCxlHqrHgA6Mz!+v-veN*A4vsnL#m$(<(6
zU)R~|1}t8kIqSO^mQJwQ6t6sst(EVLrfM=SGVp=DR5JCCRsB@tVkMU)VBo61^S0~<
z6To4xt8Y5Oo}3>A$8;z9V%#GVmr#%XK}WJ&F_dt5`<bMpn-tYpNrH-mp<EV<!S7yq
zT?^7N+v-Lg;o_<8;tvQU_xBOQdycZHi7&~&>?;AGyS%>PA63m*SxfEb<d*j`pJ^?h
zr;M({A~*QacNnU@|9l?8J5Mj)wSHg0j*WQQ!Bm1l(2n%Qs8m317DD?oG3Twfom@?M
zUUa2CG$s0$Y^FfDgYb1;0Ky~cyws1_g3H<Cb?EQTSPTh7(6}-0FY$@*tyO`P0fv%W
zQfj&<6shNYx=;!JHYI`79%HK-_aOUT6;Cg87)P(cn90Y-?l=d(YJv+>It22y6^j1&
zA@vOxOS~&dOukV15a~kU-kZ;pWFA$Bt`SORt6N0YUG1(O$YdMjCS7cLDpjT%xp_r!
zZGZdTQWZ^u!%w?N{*|*~qe&YY!|UA9gdhOn;#Tt&|I3+VkzY!s-d0Ge$9qa0E}s(X
zUKpXNkBYS0kGZ{NVp_dz$7_*aWbmChqWXFFD5)3x&Pp)!;@{rhOI-_PWA1+9*_}zk
z$D&TpWqTDylLP;-r5?N&w_%Ny=iYxQ)Vsx<dQcoHQ(WkQf-j_H_Zcc6b1@c<IbSt~
zd`8a1lvTv?w*GfkkqaBi;~v800C!a^jV;)|-s(-yO7wARS(-#ev}BM<5uy2QOMYo3
z^RK_s+S~Xn#XyVf{SU&dvFu4Q`aMNoiLK71q)VbtcPd5Fn%-frf(QF~O8xk{4>Y_x
zW3;Zfy|4MBzLky<o-qx>AD0IFaK_y|?mKs96<2bC{gDx<wBd==8b#u{s!_FCzJjf&
zh?<KOjpdDxA8`@oWsed3g8CaS+!W6Ahoujf`laPAAN3F_;pmO)4VkNh@b`L<Ypm-s
zxslU;QW<06m1Rt47rzBa)cfznV^bM(%=FxSZ;juj@_ang*eE1;=8>Dtm&AI;MdZOR
zTN744(9|DxS+Ba7f4Cwh_vTE2z!KIL6_tjyO;*g)HXK34F5N0u#eNo^dKY(R`ldgZ
zdD+y>ifK?7@A5seL)y?`#f(1glS6d&KW|L>$gzDyu|^lil8LUxQ}1%`Aeu9({8X5`
zVKF!^2xxc^ZC=;=KaIxxZ<1k!Fx{;@gOHrNC?1Mt6K&dh6fWOSxE@Z2R&QZLEzP*2
zlxLn)Cx>WT4j~DQG0Axd80%S{`un^OKb)T4td85w2?QZH1eVz`+^})>=V!!ypk5dr
z4A#$<C_c1Ej`Z|mFdcg)zp~5Z9lAAJS!$ojhJX8}tZk8NvLVS)!a2a-y)gYFR_#>%
zi?`D7r=iUZM+)=oMe8?*4x%4nTu!<X!^gKfN}K!SRBcw{tT;BbXz5<gPhS^5={<XO
zMywe8G>O|$UrphQK7V^+n*z$2zWW8`lB5n1)0^&?mwXE-MwA;hoKNRMGK|W%qJ89q
zm7Uy#zl=h>I32JZf(4p=<y_N!v!_?TPA5@ksERgpiAp}nYiR+}>P2pz!bFgYH$}8_
z4SEXEe`qI-MF|tnNw1kM?~-TqNFtmE#G4J=J~vGi3m&p(_>p6x;|9GIwoOVCMfaxA
zb%lo=-g@ZrQdN<^`SbpL(wtzF2n%R8@^O8iTb6xL$1=yKQOYx>HVLvA6+=W~o)Rd#
zW1gBxo#6jFw0H1uTGYQi0T!8^#yP6^jHc=S-<%J%7msYk0ooi(3EzZM>tSPdl4ORd
z#pknQ1(qqz2sSzQsPy8kXc5KG!N-4wHPMdO?Q(vbGKTi8`K8Q0T1pXpVy$6UFT2pt
zW+B9HHgr)>GW)s#O=eQmbH037F=Q_%Zs21bsuL!=?Nr=j{ThY?%{W*O%aF16i&qyB
z9;Ui{mtemV+_hvk*Y9e<=Cx&jEy=Y0kF9yVWVKUYF)Vi>BW~pckoc^}u!Q|<4$2oo
zx;A&a9h+i7wu+;Hm%WGYA}LOqwp*@6rod?{`}OX%lWQJ5ONVc#O+*OQDZyzYFVn-C
zO(mlbm7pTI(2mzvc30T<fE(-sMs6C`umt}IjZZFquB>a_RPPAc=Xx}h_#(~5vr3e0
zM%xN=F?N4;yzVJisIGrv)Hx6*$I(dtgM9U^CVYg%(JsF~XP)<1v1RDaOwm(rs*Q`0
z@sw+auc(f$+4phk4>hBD3|~r%MmA5~&g`eX4pb)Z4%K44CG)djDo~-Pvad@L?d{DQ
zXzir(4d152Gi=7MwVOk(=9@+GqmLF0mV6Y}KFqBBYBbD~^8d|YKjUlRKWp)3U>fso
zYfz@>lW}<T9&UCN^TklQ&B|j!bam^{FfsfwbOtJZY0{hZ7D$(!o`eTe;wzy#17Vhx
z)jmG&5B8?RvylTFFh#F4t+2;cVk?dYR7BDJUG%I{?Qv%Mb4YEfa^9gbVY*?e_<^M>
zn%)w-Pd=*bS>HZikYc^SK>reMiZLh>xeFV+JCkgd{>d=D$Ytu5jVwWyOq-rtBM$u|
zlj=!-k^g=1J+$U%rg#n^#YHhs!<(M#@|0wpFkFW#!`>rx1{UG1+f|}hH84zxTL=c`
zG8~|zO6f}w9h0{#ljLH0WWk~MmZtGBrI9XMq-RW)h&wDT#j{2IVd+Cz?oF%BwF}E8
z(+CjZHh5E%eL<4TK{aBKv>BYVX*zp48&v5l!Q(B#<0~O7nrlWql2gBFwY<nads@Z2
zkSl08ES*;j%lnHyB3ZxrxBJv(_Vh0T^M_>!dfxXJ1@K$_IN{l-A+=<q;3T8qWTUI3
zJZ8i#6<hPjyT1Y-$*Uz84IR675=5jKxd&t7`E0|L+<wFikympu2H1C{l2>yu1~_%q
z6|7?ktcuQjZf?2Y{M|Y_=aY#>B4PWH8(xN50{wjT80Nlfc=YwdtW|~n4ec%Gkh7Gc
zxYoF`<{0o(;w)yuoI?<zLa(AlAJBcqE#+Ale>AOJ`z9(lW?k3*(@R5!jZe<co3=2U
zP&myrY<7xKD)K3x`iNH)-8fT+lVpTs3yqFXXz`3XhCJf1L{)>yfaA%|Zx)dB7EwBQ
zoKb@3WnqVi)DHO%az#0|-1{VTe#Cw#VbV?scX`97$-ZyWx8+m^e>1SPCXZ`#7U9wb
z7y3cofz0X=@4`M$u)ShsKs`5A<YkiPB;cYuL~Sf)6_C#lRm8vZ@>(eA?(VJ)XL@CH
zX^ncDuM>89rI(dgtjM+LY=yr+sK7zjTM6krSA~siX8P*o!d-Hi^HofYGaK~Wh?&Vw
zn`g>K>^M-&R=+n?JZG(QtCJt`ZE2~h`thMtDNL6<4NV=d%ApPAh323a>c_xHDpm|W
zy+J-L*2Z_!zfJeVT=tuO)bEM}NUnc#)nU0j!_wj2Xrv3+lvOAAi*uM{OwZ1|Vm>wK
zx7i09q^F}9hG*nv51zgEDdN+Ms&+9lvj}#xKsTG(tgaL5{pq6Lol-%OFyyOY4f4Mb
zUe{O(d(P>Fx*ZTnHGrjy^(Ly4&NYXwVIB1=MicE=j^WBa-AgSInFB8fegV!T2Pilh
zbCZJQdZ09S*w>k<Xj^SRM4;ojzpWYOTW10T1kAV1%tr99iwX4~Ewjk-j8*4v)YtkA
z^kGTc$<sicSx5y_#)JK&xr)&XtHri9-Wp{tA()}HYR<XFnW-K>IbXiUW^CO2`hz+g
zcVOtbU|Y%TSm?K(GpAv$BO+{1O3S12h7nItbMjZocQSG%U%g&KS)!?rvWb|<yK*Fh
z75=s*IHeQ1f2BoZWc*Y0MOEft1Vf?+%eD@s3zVX6!VT)yY(HcM)fKiY@19iu+MEFo
z9@Jk;|LBzt^%vw{oF61TP4_0*87r5YN85%}&#CH=8e+w#ArJ2IpFuaTg8G%*1vU21
zzkCutg&99Tj51#IN}?f}IN{Qb(<&f@J?}URpO1V(MJ18wdil7r-xrDl(6`1%8D=#g
z(FDd2Uk>g?_b${MFBKI`%g+8(C^6plS)n+Y*sfv_8QB!|CXF{Z>GF?N<LsKMeu4BY
zFdsRjF_H!wmHw6<Yo10WFPiA!T=trfEmfdiLIV~;#r2j=|Ay|TVd&8j^UpLjSbh^q
z!HVd!+!gi6O`kkjEBTmjn!a0<Dobh;GK(p>mCB||@-yQWwsl(@e|@7~cT=>7=E`R&
zCNJ%1t4Z2&G0_@U;qB6;{`hR5i$FC;m730%uuPC%`RgYW^SctI-0#U!!ztcp_UPvZ
zIJutBhZDBN;-A<48mwKS!0>L0$%e&-pq2+u_H>#+X*{~>oI}E@4f2r9k%keXvYIU4
z9py3X4X!Qe+<g?;0~N2mWecT@3SEo}r?41)H(T0FYQD)^YwZw1wr`uvK{E=B$ehYi
zd-0|FUU~9k1WU+C)+lhJx#@Hu^1ys!XE^N1k1DJt-(U*+m)+gdNG4aDk}$Qi$CAB1
zXruP4;z&1%*nR;dTXZrFarEHpPoi3#{xrSIe<U27jok)38sC3q2v1jfT5qxU+0EZn
zht<Z6=iLeh${l$8m1$s;;{`u^KvT-F(8U;=qxQ%2jNVNP+Xl{@&@uPT04&7f3Ju^l
z&0H{twyD*5H6$EhhGBgA$q$B6+422Tw=(CHu!=d7)cUW`qWxTas26=DekgM(-jo~w
zd9~4BCgk*#bj@LsOh&G<eeVb1pcsm;POf#5S^I=VvPSf5wQO?w;jcYpU1N3QZyKiG
zCiw@JXXEGchrZA>D%N#58}{_C-*_<&wK?c;nL5Wh*IilWowr&lblt(!n+>s#+oE|C
z3tE?^lZ$Im<`{yRY3d?I$m~l-5W<b*+;1oOEW*Y$PCi#WORME=?9p4Tem!gaZk4C%
zxPCHdWYkJ3!R)h!(v}UoL#&<8Nymkj<)toGsK>d`z*;gEhwMiDM>K2&aH}B4neIG(
zG6m1o+4OAGrrPkJoZJ-_P*Ac?7mSE?R|#BFu9#1=)=(z7(ik!`)Fyp*C^5-OB&8K`
zwtT2pA=gXi*P^C)NY9?ZVLg^Qo;v)-lQSr}Ym8p{#Ua7gNU<DueQv>F{%P<&`E{1>
z*(!s=L1IOcguV|AyF+R8Ot}oLi`u$PV(<9+&{~JlYHhJVt^GPo?&I+UD<3((nTX=M
zs&6aYhkaO81|eFbV?V)<O?5rKuOobbDk=%Z;zv9v(5sG$`Y>#|CZ_ARRYOE=iY8bd
zT%=~scnuSu8xxAV*8Q;9ulGwg17mEO&?he=CKi=-f@zf}C71&cP^wXVJE17#_A7no
zbUB8k;YqcZU$h)yY5V~~Inz8@8K|{+M0zV9>T#y1)YgMUuNF(ze^kqPrbF+Zlh6;P
zN1RCra-5xn@=ezr>c(<yIzaA3r7Vaf4S51z-?gIt?FbZIv|6mb@abqHQBfTai-ZJd
zQ`NKynOd{AEuj3$8X48fajyF1njw-flvTGE6Bf@!qFe-*n2OyUGFcJY^1u_0O*R|j
z_5Rd|Wq0aD6_7tX*IJFX{7rdIEy6o5NJc_M@H^_i?oicG%;)6$?$9(R2(f^$G~m30
z{84yl=-^C2bbyuU2Ae5}6@d9zQ&Il~fP@r>@~?~3urUqsLVf53nJhx`Y5z&!pF0TC
z1H|+fqG<O3x!D5p55lcKKnV`n0$vXeGIqe%hs#XhtpmW1eSe9`<bQXmKnX{{1mXmy
zIRYAhL^PZ&nif}E4FX^yVIm=6ApN@&1^&wh5WvBwP5}8sdUdCJKpYN#-$(+&obGQ*
zx&M>7z+X;)B!HdvAS*ZnyZ~0lz03(lL;@J#UwZ%t{OSx)0JV+xs2mvT0^os9p!Yyj
z^F2TUnm_?T$V)I83dqvl&ufD8e|@O_FBu8x6L<&(o<Y|C4Ycs14o0xb^*&j=%Y8EA
zUhoJC+`qPS0n{KtAigVL3z!T%U~u~Zs(+sBz{$`9Nes7pk`#!4v%{$o*r27`y{%*H
zpA9}Yh6|EH@3a1N11KOL!DBZ-=wbEj?)R&2Sp@050AkSG{XSy<K_P)3EfRt59`_*~
z?)OA?mjAhs!1tFaL0pe}H{P}XVzfN~Wx#mr!3tJ+09ru!?t=vm+ok&FiU-Pi-Y4ka
z{|_D9r~eQA@DH6k`VU<=`kyGO*FC>skj@LRzOTp&FTesozW(P!3SM{tia^25Kjj}g
z40y#2KIra!&n4~dAq!aP4X7a}Lm(i;^L{n&AQ0;RrjqgjD3Hm~An@ZWV$jP6P)7cU
z`JXc41E?d<V*jTsU_<Es3$XLOXJG|K_ySsx*Wj=(!1Yk9JKy_aA#;M1egGfvgcH>A
z1Ee8tV6q<|1)xiUBmYzt5Z51&fh>U9{s8?$)cua7g!~5I_yfYo)QS)gOA*2d?)w9e
zAFL1i5&0QN9&m5wd<Fq2o<S%<j{txj8DA9w{(S~v{g2alz&)ow;6VVOg1oE-0Y}sz
z6yR6K`}*1i-Y>6H9RgOXL)gHNf&Vk}Kemu^a3T=kM;_IHfZsGABp_W7z<$4x)Pevd
zWIhWBNM`{d20#7-bHRxqKpJ5728n|KRmc=*9Sq1qOu&Ns0T~E`fJdPa+W$5Yamf8w
zhZ6zvh5!tl_YGW=|8o)`A>sW$n_K-k1j*gg-ocGW+s4TO#D@YTVCuhc@^@f$2q1$^
z2mfCV`ZiAg*@;3QR>~6!Xh2%Q@K8Vjk^y#v-m|d;QNr%oyvv7x&-3qzTZG*cj{;-D
zfG4#7c>JG}1_|l^eaN1I!(sOi*$qe*4lq0vUNZc?JfWaTI3Nls0^#BJvowRh!T~ve
z=meyS0OTMVpj8B*0+8N;A0q(9hoF%NfRc*#F9b<LQD2tpnWhrY|9`$bR9y*VfC2mv
z7tjy}sNa+S00ZQZZ7~5*8}okscd&cb3Lx8aKpx@<x<3ch05(#v={cYPppm7$xIgw*
w$N+GZ3?Kt7U);O?24i2`e<OY1#0x+PQUX#&-X|3Uog)EGv;^DxgB9t20CAY<Qvd(}

delta 10710
zcmY*<by!tR_cmuEq9EPf(jXlYf^<tsOGtM&2T;0m(;*##bW2H>G)Ol{2^>;^@5uAK
z@Av-BA7}2lXI9LbHFNE~u00Thk`{!5rXr8{2p8eWlP3tDlg+BoP9TWzwgv()0S|!f
z(Td?;kbr4QlaPdunef__3hJS*k$k7jS|WE+tw9$+fGLUpS}Tx&;b~MgZznBffW#{_
z_7ZBwV)dR<f+t7e;h%tT)SSlEK>3zjT<Y+~1&fBsLj`x`7_P&)jbYfF@C9s+%L;MN
z`N+Gn_~RU_-{I-cW#=*BOO|_q%SBSbn+XU)!tN4+p2P3e@l{FmT&P1q)sNyxqLcc;
zYgT5O&(EpMt+>f~im(>9@<^Q5q$@BWXoxvj;j^@PX0)lL8Rq43bnA*%QvHglQWJWH
zwUSTNX}nK)aQh#>qb0|0{vOW#2#flvJ8Y&RS|($Rsr*ppBTdzyHgAE>l^l1XF=cLX
zLXcF{+<_gJWo_EHL<Y2g{alPb(o)eyz2b$ckC(f2xl_S7(@*tav`LGw-tKx<S1EII
zTA~ihM#&>gG<qWjl15cVWx}`KfN`nhC?j$3n)*x&4-v<3Xq~~>bIj+-F?ZJM>%&@}
zHZh!TAA{ovC4bT4Jay~;h?BG?{lb5|_?J;+a226+|M8)!xPdP8n~jVMoi2^@Cytrg
zSRM_dnCDDEUCA28ffz;BDWP0XbS|hvVjE^j$b^{Oc$r$cQwA(rqxh7&-cyWDIPex?
z_GYKtMOKfp%~VzW@fhe+Q=01*QO6lQfT~$j6O|lr<{gywqvSE0vB^j&I3EHc1GrP_
zovc*Zg=?f4L&e%qlMlYK*L)Vak1nuJXYuM?>P}_WjDkt|&U8&Cml?xHXI>z$(Bo>&
zWn?K3TeA{m-?8SV&a1M<mX5!`*wH>|P;JR+HLK11B5ueg`jdOgM5L1OQp>b+Jxeod
z#JejOb=8LjmMhjx&0+@4h=n|3rXhXKa$BfndNHz*DtO!iZ7i1UM$+m^8gzS8+N&TH
zADn8Kk|;RviBi)<2lf-96^!cJDAV0z@%&MYxlv35Ny&{&Ly6ok)p;We3o+)mVU5(6
zN)khsk*>Ti9fr63YOz`~KP7KksAId(8A(D@FZi5QgSiMeT&^%@V_(tF;&O0k2ykJ&
z!!oUN8jc>-hQ8tD@o41~`&t}NtZMig*^RJ_y0v8bMfxYRmoZm`Vd;!r#eT!P4T2iR
zb&IanPv~xKEuR^(vYoGSqJbOX_TDx81n*wuejN{WtvU_?FWJiaLM&YJDBf0`pHxKS
zGjbT|<*n(IZ@i4dQpdrNZq^vKrZt?E@35mBK9W96!+|Q%i`u>CE)NSHaT9R=5S;E9
z;cAzFIo(vVxftbiRCqU*c}_Ok7KLUPBeC#9QNSZ2_~quRo`{ECuooEIQk7)XH6{7%
z<cEb&ml*oRVD1l#!OcJO(cV6dZ@bY4vi~d(WxO1pI*3+s{`_mHat$KXpIoK>6sp*+
zeTGTE&TI|c+)O>xKhgL!K|@TAmn7cm=?A8U2h$LZNL-l^lY@(w(ub|*8?qM;)I`bs
zN8i+lvy%J0=&FrGvrKDU_|n5hx3%5}F_O0M2(eeh0=h%oq8Bg5ei#UO^RzMu39P3`
z6vguPn@3QvI}5zfrb#9L{;O1PNn`=)*V;>8Rxu2X;hRHT3`Y8ezkIPqhISlo5ajz|
zDC8UU32$eg66wURtS6)$z&>>$)yesW^rO%e4DLT<eqAKgpbJrzdiIo4$JePCjCoPA
z)WiD@lxp{lFdj!yztIJv7mK<5p7+ITAUcZ~tpeBhTA%cA)=zF7Z9b!G0<`vce}?|*
zbTtv^A7&$!?C((yfw&!g{F{C4RVVrbDZ_XA-Oy0Q^`Rd8J%5V(f)|sE@u5LMr@CnE
z-B8`EjD*;Wl42p}y^%B;S~jNkzWb|R$Y}S>gv0xFOlAdU43-JMqz4M__eH^EzYx8f
zc;Rn@Qi?SAws<^WXMWL-cT#M%NQe57R(Y&L!`_Er;bf5?b=zinNx~8$JT<>mk2u;L
zd^8WtZbfIsYg4z_oa|R_!hig&`f*_yCr1dGBqH|&!<f@lWU@qq>xQXbRcw-(NkL0K
zaB%U4s_5*bth({72+3wmk20N47FG<s!fWEY#Oy1o*hy^pFV_6bva`hsuPFJo-`9Sl
zhB8v1tr2R+E9R;*dHFv!Y4EuiH2>gJ|0_gk+M-d-VW&*bbX_J&>gk(7W#6}bfACOo
ztW~R3D#k5Jzfg8_H+gcg#<~)!&39FCk4kUwl;SR-+?4Bvj6{AjE<f1I@SNWlNB{na
zbQL4BWSll_Sn`!?45J(N&WsAl%U<G|YF21h=m5X{I*Tl)V7mUypmCZlUsVoMRB7gV
z%@;|Al}QVoVB?#xmpV|+s=`zp7PfsZ?zFw~F$EOUdMRg4+}>fm<?*!85;DIudWML~
z=)zBf^Ekztd4fh`YJ90Ip}u};mCL-={P8>T_Do)G&vE|baOPn>=au2g0T|cx%ilor
zaF)~1E%kj|#I|L-JKji$vTiwJ4_N4O+kInsriP<mJT9p;5|64y&gvKcXv;3o6G<)Y
zB^sOku$9FWbvb0ZG4)Vy_4eECr=l`uV*Fq{;yM8&*DA#btJ(>%0VGuHl444Ad}Cr)
zepLBt0&J(DiiB}Gr^C-@IDM1stlC`A1_bfr35&W79?Ks;HOyCw>=<bVAwGynm%el{
zcJDRJOGO%9S!LGAuNml+N|a$)bH_6w@Q$0*OKYoJ+Cnj|tTZ})XS`9PDe}M*Jw8?!
zZcAR8II|X#bC4{cI;@=DdQnmP4$;SD>*>X;o+b7v=kW28dKg;Uf$qZIoQIVtR8C<;
z(}VbU)cVD>q2I(pI#AQ1OOshTa~0Z>73-`s@@E*unT%PsoZwx;L8@kx-4oNg6&FMS
z^b{(~fw=e+m`OpDWUUsnq_1SZ(g3b^(NN?-ktB9(NyHQhWvmgV0;9CE4u`S8JTbqd
z=eK#QD4{Oq{#{+qE6Qve4UN)%D3<Y7;iskqtF6X(^K`S99Pc(M7A*H^Bom{!S+i6Q
z9eIAleTq*L$eXb@vORXGbURQh3y73gyZF|Od>|W@$=#GW(EoFrFeE@Ydv~dQ0L%A>
z+|lyy*g|pw+yG_D-x!RN^RnCq^);JBVINaD6Qt-H6c58*fQ_IB$<yJ!2xuUZ0L;nX
zAQw@@E9sqpOYDnvE&C2So+>8eh9@)K=F#JWuMFhp(v~FVB4?<aqdZddUwq<?)0&>1
zAEQwEYMJ+5S@dA&W^X&1fMYVWhj`A#EkY9~MmnC$M2e}P>)7Lmv^}<zAM<Bl81ITJ
zG&L#WQ%|JPukIcUNs7QzX-R0mD7{hf4O!TmpY90U=emaX$;LT1!pL0L`w*JwnZRAu
zEa4vKxQ0e!_TsKKWRdGljmY^A;zW3F5BhTGyx-I#e_&=Z=vd`eTK=#mL`*Ie)gEM-
zSi~U`#g#sF>Zi_eImP|80eLdpq?4^VvK?<x1hMGUh@J|Z9_e$?^m_z-Ts6_HeR#r;
z#-$Q1NvH@iCnc%6AnBK=zblWVE4b09|4C8Jjd9O=S#*P$nnkNNwlW+VHkfTv9Z5JO
z7iB`z%T*9RlHO4>*>si}s*@2A;aJF7*rm#4qEKTS8A+?j#uIPRG+$d5*S%8}Q0u=_
zeI_NprCs+&w-wFgy6+O&M)haJ`^l*qKW$Xgxd~J6p`<M_YFdT!ctw`7n^B~xj>*!T
z?`p3R$)LxV3<u0LmLq-PcFkxlj^|Mqg}23e)g)yGU347=F4HFyE&>i7#7pZ>V@@UT
z7SJ<#w1m>nvjV9Uh{CKcI&ljXUZa_OhU~0UEA%-@EinyunWjWR&r|Y*IKh*b@e$oT
zF02b$kw&#j*SS=$UlJ63cXeqy9f_*n`O}zYmves5G!!e%H=%e?gr+c?VtlePoT!WI
z(<-fU(0vW#QF0^;9t$yjxADGtx=Ymdebaf0J;buZ#$z7X^sup*n!l@PfR+i3cD&^E
zpljLt$z5ShOrkdi?M|O)ObSW&^k{ij!W0tXe$>so-r%Itd*s_V)}ypil1yH4y7$$0
zPO|uP-;eo4#bct9{)6(ty1HT%{v!16h@JG|O3aA~!b1z^i`;%+^wbS+xH!G-@`Sy`
z%q-384a|O*6U7+(@L=~j((kW(p1@-^ulH$w_;=7vcs_zMI<(pL=RWVbqVgQpS@-I1
z%Re43#asWVNyaL+#L}Owt}I~tB^Q9Z`8)Y15%p0CXL6@UlF{DP4)<!ZS#<_IAAO8|
zn-6gyc?E0{5;M|lrNoq>koDp2MgCOgw8E_-7I&Cp7%YB+`=_ql$4dEPXjAlX!2bQI
zHgR^*Nzm~9N){AqI)<@umNz3L@gA$+_UV(jzUP~Z<>d_PTDXObv4wx4c-N59RW>na
z7Cz>@p0mLbM_)6P{YtEz_EkWjzjjvsR6dYt)oN9Hbd)%Upk!0{##tEild-Qq?J>V_
zPN+0F1<9ugM{(kWLNVX*nWLPj$L6^sk8v^+(Vi2hp*<Ib&ZpWY&ohpe^YL4zOz?a#
zFwbeCyXksiOy9Pqa+WC;kTv9yGg*q#LvO-T5CNkmdHR%yv4CD3iLBtYS7;uw!lZ;o
zi>++W(Eyz$c6g|2+Y}qrzQ2F8I^d?`+2Z594>r-i-@orxoc+*T5$N2@tGAz_RR~t4
z!w&etsv_6{6yDT4blU&^J7&mF$uw*TrC3&iiHXnZ*I5vwox=>uL2b#Kb$wwqm`dpA
zL<C)h*H`j*B<V`klP=tA1+=r?058kXQAVQTQNwkY>*4q$vFg*?crmM>5T0tMtTepc
z0{lX|I+`3SEty%EPcGLs%G7zhbW5}n7IbcUKW!o?1eN8{Pjciy_ZN={ev*@Xh`2Gt
zOy?xD`5DxS$%^%nrDjkjlr4GQ$$W;XCRE=TS7)K7vzdvc?8K(nO(y}9&eW_lbyl;k
z=!1rSp10(5)tI0==`F-bZDd1tM+o=Sw<0gv{>tm?+RhnSgTH;<83!`z^~z(y#T;mD
zE*4+T({u+&z46nm9#zr~&ej;z6Zx-CMR)vGnU4UcxdVWZ`xz@Y`Dbo4th5MTzxxI?
z!Z+bYbfQsjPeyR_n7@l%(kR7HiFtpQwWpG$=*W-xF4^8`b%SE9*R<=lr?p?&QBYCR
zIIIyeq))Jc@)mg8T+yphf!6rL6}bZHC>(07uWXoU%~#=k*m+*@p|SHEeWnF{O=>oz
zao`+@w=sIv>=I#dea5luH@A(8a@TWrGTs&v$MhDWh8B~#^`kDbBj#&O-I$iem<_v2
zI~&*baN8sIs-;`<BgcFEORv+aZZ5)0PpMVEF<PP3)DOtQigSCGeXp}<$uIPv(b7VE
zYj4I5=eFjf>4jeY%HM$qZFyoVduR1=Wq~K<JHkRc*F<jKB0XnWD;r2-Zw^kjDGx__
z4D1;VVCX#}#|(={WD#JS`5NBwGtyt9-D37+2A8~RX!d-|FoO>bt?9q&-I141M8e$L
z*w;gr8yxz7tF`Xj;w@j^p^KhE@32HK{K7Dt^sd5S&lfEtKlL%ZN^fFiH6BANSZM5i
zGI5LhX~7cJ#3*6kruoBL#JQiP`7fJji2TR)2rxyWx~9ToJQDl#hlE;t^L#E~F3k}~
zQf%Jud2F}ZOb-y{PYo|-SZkx`_77TiEMA_Wxf~Ztx?r3IEgnyBH-lpgP`7sJX*q+Z
zzs<=HSEp4kRC$*AY6A|6j3V*SJ}kyeO2QC?Lvvapvhp%p`Br@C7GRh$D2w~^@gaLy
zxXrq|u|Hc*^`!M&VZ~aTcKPfUAiOCY^BD88s9@KNg7FIn=v@@aDx*+%U!vSVgnN8n
zTq-J=57F!`Uy*GJD=0&`vQdMU*>{PO*MpC4BeS;Rkc+%TjoN0MXL<U({oQKX1T4;u
za(2Q<78In--!w*n(b1n7_t&^Lq(+Zl&xOb?E(+WqK8mJX2{~NcBI~hwuC-sY_X1z#
z$mm%S4WW3e*Q03>pW(8mck7enc0#wegC=h}j6QluW6Xr@-lcj#m!v;%xg70!Up>YA
zQ-5$l9JPfsFNlcPKBP&Z_yu2PKp@b_YOGpxt+R6VhPZ;fHgESM9pXFV{@Gs_tBLIg
zg~ro*D^K|frqwLj+ltd%Ki&Sh7x9WR7Ypv{o0F~N+CuY+SPr-|SCL14A{s)2(}#tC
zkWPbu@Sjpq0?AYCngI87l83?&1i(q)Ou%`l7Re;ovOj>vE_AX7Ft8$_@E0^-vOtar
z9}pl3L4EWOCkrY32Q?`|GCBWppte7R4yrfpm_>7VG+j|)tDVd;P#<YCFf&yBJ|M{S
znob@cf7ghXMwBw8Z2H{laJr(}dyAkFGejDA+(l!E!YI4Y$Q}ELV1baB@+s8_;zWF{
zWqN&LA)RFcZ)`1{3dv5k^YPJgo40#6f$zythQQA7aO(Bga0c4S^=cDAs70LzKlBxn
zdU32Y9sBj8B&D9`Fq!@O<*!)678c_w5(JyN1ku4!Xn~2r27h>>8P#bmmPHQzX9P_?
z(tKVzX>{|T%y<FhtN2Uep)%Q6ke*Glq<#KQnj~jYk>gLNk6Jagx~bp-6;~N}rGt&S
zx2}Jt&a}b0{GsYfB2eH=qck={h96oguaqQ*nzBI>^DXc<opqet^swqWmll3}YlM2K
z(ek1Ni%5ZFmozLa4tAiLWaa5L3K6#Z_(A6zZ-YX?mebCA7IG2n2x6idG$x+Yg9F4h
z2@j!93x#PKI-P1Y7d?jM`b%<2I$Ml^UbNH#W$IR-aiPgmM)Q_n3<1|LxiaV$Bno$5
zHEBj>X1S5qw0fp=icWitkzccVrmQwVoBTB@9qtcWixQaQBP`p)7ztP`{$6y5Ss|>r
zYD+~Wo42dV0?QU(eVhbU&P`DzLdryHq|pE;GfV=gjq?(y)_s)3_bk64ir6vZ^HAiu
zDU)r1pO_?0GH!GQ5iEbPf*RT^{$0HHl_I}jE=4tC7ZDDREsKRp0Tpp@`X;v6CsBl>
zB&MkE4BtXrOr$-!QW6>Vw@S*?i8mNU3+sQbea~Mj%B+>oU1HK#oPCm(-D|MqAoNV0
zTZG%~nG42w^o5Ik_W%>};#*l6gR%+oWOY(`<uBNZ1&-SUwj|{9A`?r{_RLfqroCbH
z;Ch(-O0#Kxia2+PidKPdEk)<(N1?zXMFkgW>cCQBo7r6YU@fEqq^K6{Z;5hm!q?`s
zj-Gt`btS@5H{+{bz+ZXRb6zqBX-j#Q_GZ^#4*BUai}R#E;zu;$3Zg3h*6HsSTzM<&
z?s$_}%?7z7>dkR>;`RLo&_Enzt1znDSSpj6bax+x0^Qw>S0?J!L-EyzX3Vgsi(-?%
z7plrb5)hFuc$4gkUK)}rwWb~mpb|Q;Jo<Arm94s`)QlRA-MFbLYUrGJq`61lOkgOt
zD<!(PZMkbtiu&ExrCl7O7)I&)7&0JH@-*MJ&8$=>7!9$?qgW$-1{#>)wGaEYd@@Jn
zLsvt`zHf|5uDQogFKYkdleJ_0J%+khVnE5OW&f|UzigRkuKN;V%3uOnFe4{#n>1}`
z{saX*he#K{**1Z?V4TF|c8bxr!auM_x2fXq)c0h)P%dQ!)n8iF2E*pm_j&Wl^4c0I
z*s&)a@H_d6V;X$43!s(1yc`>GpJ%A=jqMT5;YTVeT03Uca?%_L$J<JZNEB%Zz8<Ug
zHgTjuEh#B5V=<GH{XT$ji{z`3OW?5+flyBJO9db%$>5>P-mNTwxjuG(6RYOLpJuo{
z#N?a6QSuQ-x3Dm(8^sqjY~fj~LiT6QN>1{xpC{aRTNtDr;>n@P9VjVZ<~XUgy44{z
z$2|q7lVz|p@s+oGmAjQlB{teXq2y)oYrOz}WHQz5xdZa4!WMoHQI)-rNV#YW(V`lJ
zTh?2d+;+9S<2?$)d{&<BoY+xt*B?K1Jmm6J@u1-yfi=?lNeoOG<+eu7{(HMaZdDl)
zELZ-!Q^TKCw@c_9-uSTR4MLmjeJ|U+9sEtRZ~nv8713>o`27IFjX=DQj+woUSw{L}
zcCR3*#J~>2`|nwuM{z?pA%@RMll}EfB4c*uVf+x<yKx7GQX;O%b{%B&8Pg>gntq^_
zQ#t2;xn8wUAGWtS4FTIw)%RecrV2sOG_AzJ(k|#$4}@-gN-mdd;^^CFCdrttX~%#@
znKXVBPBHAr?IcV~C7#`w(4^f8AZ$+IU?rc4A9=(FBp7w5)lKt-UP{EqBH+_J?)TWp
zuq7Pg>~z>SP1YJm-OjgzKW+2WuKF3Wl%QF9veY<>Tc@~Y&aYpm29sqe5x?En%feG$
zHh8S4n+f$Le1<Sl^Ne9c#$*DJV2IcYOy@kJsIGJHl*)fYPi*68UW0;WMzGLm_Bmb*
ztZ-QM=kx5|wh42_ZlG%7>vY|5{}A4Dk|i>afnqhZoPc5C^l~&~arx(Z{vX1vCD{-A
zPR;xq#15<<i@vOV${>lqES)<rET1?g<7?kB+d_g)qB?0)D-+N;?g|DmZ3?bs1!^`j
zJLqOas>GZxJ2sKEOg0JH|J(?ARzdcMLEH{0HE=hiQL<&^$?d4p<GU^E%#-rCw)e)*
zyAB_1Ch`*)D!(oihu36S(=!LQn?Fl7tW6`l;TbJWA8(S0Xq)6{suetz6GO7m2?$Wo
z>{yb8c3vg!F)PJ>SsqiAkAHj@KW^c2=(6;IZH26G-;~}gKoKcwSy(_XnPie^V%urk
z>B_^?z}m*MG-E+F)D7i{!$Jz_a6Ntjp`Yi;6)R)0NKxwOL|@g{#k<&~pn<Jx_SXhR
zUp@8VFWLK9!`K<?Eg4On8YR|M44q<R&)>e!fX-I5FZ=fP06~>iqhqz<j~jx;@wapI
zRJM8TspIJydD;ZLaPf&#CN`*+erT=4VliyHnL2yuiaFr%J<@g-nzUP=kDrfacQ$uZ
z&<kythnDR|?-PG9&9|)h#3rw~De0d4iCWvmG~z{jCsKNKJn?p*C)-(QybpU1J*?4A
z4l0)$0O1req3yzHz9l|GtBe2E3PT~84OyHXu|Mu=B>&dJtnB!4hvvuB)kjUR+10h|
z{alTr>q~w3+0*zlo`s(@XzXfRE$Z+*`2ggI2&+mV{Psx7gzF?{nZPXjT426%6S0FL
z-Gqb2=v&>M*iuTQBCXsvMQls$N+gh85_(UXeMOihxQ?$uiis@$P3(n8XQZx%Izqpk
zzoYakA5|YYEceoNHb<5X`Vt4&E1ULZLCUcwh_!7xnEm{br=q45zGHSVbpy5TW%!wf
z$Qa*@f14K`O6iEIZ2jo>-96^BEFErd;P9@Oj*yV*e%!5g7$bG=p)4U!OQ_pZ{|=gg
zd$PCbJg<F%;H7<rdxAjuEB8-s=-6sdI+XE?En6X-M=d_)8zPNy_FYDy@jKK!;KrV5
zEBo>lcv|Uv>W!gK(=KnpHgl_5;Z3r^+1J>`VX#X0cA@v!P;Y~9R<L)7Y(uKgBgkaY
zj^rNYUUQ+hz1PRO*N5@mCyn?XqZg`Qe{yf!+RJ+<s)O`H{&@Y2&#*QjJN%?tf3fyJ
zD`-b6=u#`_c#a5LaV(*F|Jk*2T-tN_$@iXV<q8R?lbh?@O|*(L1p_zS^NG)OC*JF(
zwel;v<qwQ<Hz{#L1I&b#N!*j%vU9Tt($kdRtbCGYo!N@7+r>rW&MdUeL=k~DxZf}*
z$=$!sF3?t_x9nrQ>3^#8EhB^?DPy0O9)an}Ps^){*yZjWMHeg)ZwvHKUl6|P1zXgO
z_x!rH`k9B`9M6U{IHM9O$f#t!CpEk%w!iDAqns_7pOq5@dv-n%Q5$5?8$IhkLGc|A
zMnjy#7ckX8xkkVp*l1UDxyQ5y&E?3QK<CJI3$tn_77Fwcm5#j29zBV1&LP|rSB*Re
zWHbA084{<EbA;lX5eA2F@kJ#?EUXB`abp2rW(g_=QEqHj>SLsoLI!U6+}{$AI|s+n
z$%(G6CK2o|W}olCpYMcUukT6}$G*pJ*VH!nyXCjx#X8RdM&9{bXCrk`*&$h;<9HQX
zR+iD1vbEvV!`J$*(_7#1K30US=)Qf0tJ|#8xx-^1GW&LO+K5hM;F&B^A!cPk2QHA6
zvc;4?%J?2?KvmO2dKL^Y@2ORGm(a|VpxP8guT4!J72(qb+P7mC7-vO%8zsfY`PSau
zBF}=erMm7j+wZa+IxHpw6-uPx)Yp#N6W^ATDg>O9Dw+kIcALqY?fP@A6_YD?`om@Q
zHBxj}i968E5$s2aI)X5(`!!^W)cVIW#7n8(SCz%ODp57;;5uP>h~~GHcPs=w6WYUc
zDvH3QvWji1exvrZUAZM^oBZl_nY37&I$3UNDqL`1!F?^gMv*linwyVN!7sm?9<3%2
znvT>aAsv#YGy45AJ=>p1JkR`3^BGgIN(A2#YZJ*62dz4EUm27Hnp>+`Sz88ZC1crb
zvml4ybvNzs50H6MEGULdcNZ?GnSUJ>JQLp^Ze;8$$xoFw-%AKonyli6DGZTIy>=p9
zC$DNUG8)VZDkRCjfQn<IzTyp)8lrfs^CO(<9Z%ftd%0$!{5~7i2sPD+k>p-Cx$V|W
zz!hgZO@BJExlADLwW4-%8fQovUWFBYbJ?{qI?t$tenwoe?kiyiO&zMA9d<^Y)iI8~
zX(ETM6v_)Zp9^!A(m2oY?=Vn*wGsZ_ADeg`nAR>SxAxQpFodSmSH6SgBYr4S30BTi
z&r&qP#8xfRvZ?coFmz1U3L1lXdK{6VA{j8HEZDzHvmhX7S!}!^^}H!_7)2GZTck6t
zYjq@BSCwBU;eUm&@^Ty^tSS|Q^jqu?6;39(N;WdyPguYWjw~Jzx9DrA*E7~QS@oM&
zns2>cTwy!Q)4hReKT<u(pj0*z>^N&0%c<;?d{^WNLwoegw6w@xPM(jF<qMVRo>G;R
z%jtcSc(?KARj#o-)=%;dZpDRcRp^NrEpNllbHei%21`}}xd3H124&p!m@82-OydvC
z2t!Mdt3y(R@_xWD2yus`lgqbx8CWINJR|+`_*DiIrFjn20LjXrX3aqfB_v%1UonEc
z>eSgEr$bwixj$@k))pHeO8rU_&)Xu$<eCKAXpQ&Mqtj@X^P;KDH<-oN+`~Oh$e^=9
z{RnGHQ#-aARd!BuCZe@<<tbWb+}!a@ukm~9jNhDgHq^SUcVsmxUKIOohi4}|F*XVx
ze@DE(XKWnmEHO55rci{GX5mB7<!6E;$oF~|;UjuD6&PH`DBbOfKD*`<&CShHZ_2B;
zs+ZcVK$Aj(x4^1@m`o6^H7lRbPa1GvV9Pf6<(jFV2<N4qs(FO>Pu!Yft>IClGo>o|
z!qKIYdgrP5U3)ioNb>>p8!Mb0<*>7KcDHoYl(RBuB5Fl%yY)9|mPtOjCd{kajf1Uv
zK^oPrphnGuIPsuHbX){V?zif7{FzM*^rgL2w7Wlt>q$rTP|Rnx9kxGiDJ-4;sBO|G
zskLaT7%`AR36mYFXB?^*loGJ<4trTUKjW{f9ufIoqFnuJNV-L`MWLMfl1?p-<-)#d
z{cPA5=p1M_%OrMm|EuJQhUm&t3~k<{CZuXw@+!YELc_eV3BE=mw)lC@Bp)@!(=VT8
zlgl`oX4JN-3UyQB&nnB$>(xD#C*O$#GR;v7!=B)A^@s>KuKlKG^C?MX<?~M8I_(RZ
zMsP;f$aUPsw6;JVJQJx(UN<Hi2|ag_v5BR(hIVC<?OM*u8n_sfsF7V^?bk}qO(sM9
z63*(krg}DCN^775jmA;<UU`;k?lOedUly<WdeG?ug?b$NM=saft**3bPS=h;1Od?J
zdrll4@n1tmch1fC>_;bJP*<DykjWewcK8$J-(u8hMT^YEM|SJP6m_RZ2UQN5vsxMt
zLc@bpw@OsszcAx-4y85l4I*2{AuwRSBrh^$Hpm^s)5w~HC2V*&{lX@CF7!kd7PMe<
z^7DiH(-3CyxMv7&r;V9EO9T;Pf_a(EfrW7+_IMv$LI?|A#)TbA9{D0@UvhBBDTi_h
zh7Ebj9VqdbO*UeZDhCBKG8lJ!0UujbMnYMn4-}ZH-|^C*z89q{?2)!5s}KMqPvlOs
zi;_aC#O>3J?);M}hqe4RNqHDygTFcFrjOsmv2N2r?gT`VaD_G5-P7*ci0;MRU#?Uh
z|L_(tT75!WLiDv0)9n>$?MK_{k0qEF_#fNOS#GBo4>i2ma6%kBxInVx$~kdI31VK)
zw#H69Iy>ZJDaV?tMoBWfhOicn283B7`qN?I^wQNM?;hVxg3Acxp4aN<x_|3L&roF_
zHDJX<7$qO7IKs1#H^AeBhS*uiI{<~XtoCyNfPfJGC;{s!X#?I5hy&6?0Vs3@l1uR*
zz;X|veW29tA(AE!RJt+n`T?vN0}&5n_9j5n!)Hk#vl+mJCKF(${onc&Xl({)L-@fS
zGe8kQ3rWxlp#bOHfoK2GKmu&UgYV1%q6fm;93TOnB_!mw;DZ(B@Q<BD{=vV@0d-(G
z>4Ar10?^q4-~bj={zKq{p96s>Ae$+G4(?h2#K6bK{|pf%{At7kORWG*P}341hG>G0
zmVg4F-u~YS;DIH8LkTZ(X(Rm4kMw^q0)kIh0;HQ96t;r<RR0%?fy>r#mCaUgV~l;E
zm^DBFo?8KO5Lxh*HDC-d4gD2JNEjmhJCO<K9(k}wX#>ar``;h1zYPF?L7VumAMi&s
z46q0a*NS8dchn1{v;}w{!lG>p4@=Za0^|x0jIf1k`}%+r$XD?|eJ8lU30weZS^GO7
zfn=Q&G_;44N_KD(*o2e#puZg;1u*VDXjrlXQ~;&@zkEXFKFQz7OhEb}yib^LcS!z^
z@Ps{F`17xaN$4k(|B&1ca5DSsua<=Svwz7XI0>l-zc>K;z|{5MF_6{~kOrK8Kg=Uv
zeVVZN8|xoG%N^kg%<ukc1(zKGIYdJU1bpKF;DQ|x$iJ3tod9CQpGXfp9+>R}NF#co
z{M);80u&I-(f;kBp+o-lA>JA8!wc}UGoTE43*I^dtPgQxb%DoiiW!u10XTsxX3)<C
z5QPYXUtItZU``0U``aY~MO=YbkV+8h3Q#>r!PA)p(hIh`0(^)^5)e>O5<&x#y8Um?
zPRW2WZt&f^l>r0X04c<ESqR7`2VwXpOn^Jwn^BO=9gsn+mWP0M@(^Nh*9^WT@$eDE
zL<I=AssLdG<!t}e`PWZ~FNo*?a6xiGJ`X?~o<?3CfD{A~?DPP{Aod`lCm;%3IDitK
z@Ej`!V?5z36F3fU5GTDMAcYTv;-B1+@PdbU#TPX60;nneCiUMdAO>D4o@YTo!2CZ+
zO$O$A0m8t~IB?Yqc=hZg9)e(N|K8MwUEK)8@`RtV*WM2cG4KWyA<<yDHy{DA0#Cf*
zrrAL*AGm3gTnJd13&8^;eE=r-(Q5Dk=pT&!|EmQ7;s2f_2zcuQ$U=reX<vZ)A>KB=
z@YO4Tp}v3s#0~87g<FXPk^KO1#H|wu$Z-lG_-AF#esFKd!Dv7D5!$^4=llSg2l+cc
zfQ0nd9Rxv9QeBKyR!NFoMIHj6AfO;1JVE&T`6T!dUjpzA6dvD~U=S2gfUjc$3b(+8
z0)S8y03T%a2bd7?&;ZZ~4ZiTG0QhDF_`~h-ff@dQJaB^#?)d`}z$^jC9ss}pY6t=F
qo)94XJCg~XER6y1ZAk+W1L1q-0m=o!Euey6Ai(li-xz+wBK$v-ng{g&

diff --git a/dedoc/utils/parameter_utils.py b/dedoc/utils/parameter_utils.py
index 3df9f6ca..993b6b8a 100644
--- a/dedoc/utils/parameter_utils.py
+++ b/dedoc/utils/parameter_utils.py
@@ -33,13 +33,6 @@ def get_param_document_type(parameters: Optional[dict]) -> str:
     return document_type
 
 
-def get_param_orient_analysis_cells(parameters: Optional[dict]) -> bool:
-    if parameters is None:
-        return False
-    orient_analysis_cells = str(parameters.get("orient_analysis_cells", "False")).lower() == "true"
-    return orient_analysis_cells
-
-
 def get_param_with_attachments(parameters: Optional[dict]) -> bool:
     if parameters is None:
         return False
@@ -80,16 +73,6 @@ def get_param_need_binarization(parameters: Optional[dict]) -> bool:
     return need_binarization
 
 
-def get_param_orient_cell_angle(parameters: Optional[dict]) -> int:
-    if parameters is None:
-        return 90
-
-    orient_cell_angle = str(parameters.get("orient_cell_angle", "90"))
-    if orient_cell_angle == "":
-        orient_cell_angle = "90"
-    return int(orient_cell_angle)
-
-
 def get_param_is_one_column_document(parameters: Optional[dict]) -> Optional[bool]:
     if parameters is None:
         return None
diff --git a/docs/source/dedoc_api_usage/api.rst b/docs/source/dedoc_api_usage/api.rst
index c357ac78..c61a6e01 100644
--- a/docs/source/dedoc_api_usage/api.rst
+++ b/docs/source/dedoc_api_usage/api.rst
@@ -150,7 +150,7 @@ Api parameters description
         The encoded contents will be saved in the attachment's metadata in the ``base64_encode`` field.
         Use ``true`` value to enable this behaviour.
 
-    * - :cspan:`3` **Tables handling**
+    * - :cspan:`3` **PDF handling**
 
     * - need_pdf_table_analysis
       - true, false
@@ -162,26 +162,6 @@ Api parameters description
         If the document has a textual layer, it is recommended to use ``pdf_with_text_layer=tabby``,
         in this case tables will be parsed much easier and faster.
 
-    * - orient_analysis_cells
-      - true, false
-      - false
-      - This option is used for a table recognition in case of PDF documents without a textual layer
-        (images, scanned documents or when ``pdf_with_text_layer`` is ``true``, ``false`` or ``auto``).
-        When set to ``true``, it enables analysis of rotated cells in table headers.
-        Use this option if you are sure that the cells of the table header are rotated.
-
-    * - orient_cell_angle
-      - 90, 270
-      - 90
-      - This option is used for a table recognition in case of PDF documents without a textual layer
-        (images, scanned documents or when ``pdf_with_text_layer`` is ``true``, ``false`` or ``auto``).
-        It is ignored when ``orient_analysis_cells=false``.
-        The option is used to set orientation of cells in table headers:
-
-            * **270** -- cells are rotated 90 degrees clockwise;
-            * **90** -- cells are rotated 90 degrees counterclockwise (or 270 clockwise).
-
-    * - :cspan:`3` **PDF handling**
 
     * - pdf_with_text_layer
       - true, false, tabby, auto, auto_tabby
diff --git a/docs/source/parameters/pdf_handling.rst b/docs/source/parameters/pdf_handling.rst
index 20fabec9..46c03416 100644
--- a/docs/source/parameters/pdf_handling.rst
+++ b/docs/source/parameters/pdf_handling.rst
@@ -161,30 +161,6 @@ PDF and images handling
         It allows :class:`dedoc.readers.PdfImageReader`, :class:`dedoc.readers.PdfTxtlayerReader` and :class:`dedoc.readers.PdfTabbyReader`
         to properly process the content of the document containing GOST frame, see :ref:`gost_frame_handling` for more details.
 
-    * - orient_analysis_cells
-      - True, False
-      - False
-      - * :meth:`dedoc.DedocManager.parse`
-        * :meth:`dedoc.readers.PdfAutoReader.read`, :meth:`dedoc.readers.PdfTxtlayerReader.read`, :meth:`dedoc.readers.PdfImageReader.read`
-        * :meth:`dedoc.readers.ReaderComposition.read`
-      - This option is used for a table recognition for PDF documents or images.
-        It is ignored when ``need_pdf_table_analysis=False``.
-        When set to ``True``, it enables analysis of rotated cells in table headers.
-        Use this option if you are sure that the cells of the table header are rotated.
-
-    * - orient_cell_angle
-      - 90, 270
-      - 90
-      - * :meth:`dedoc.DedocManager.parse`
-        * :meth:`dedoc.readers.PdfAutoReader.read`, :meth:`dedoc.readers.PdfTxtlayerReader.read`, :meth:`dedoc.readers.PdfImageReader.read`
-        * :meth:`dedoc.readers.ReaderComposition.read`
-      - This option is used for a table recognition for PDF documents or images.
-        It is ignored when ``need_pdf_table_analysis=False`` or ``orient_analysis_cells=False``.
-        The option is used to set orientation of cells in table headers:
-
-            * **270** -- cells are rotated 90 degrees clockwise;
-            * **90** -- cells are rotated 90 degrees counterclockwise (or 270 clockwise).
-
 
 .. toctree::
    :maxdepth: 1
diff --git a/tests/api_tests/test_api_format_pdf_tabby_reader.py b/tests/api_tests/test_api_format_pdf_tabby_reader.py
index b2ff91a6..959e15ca 100644
--- a/tests/api_tests/test_api_format_pdf_tabby_reader.py
+++ b/tests/api_tests/test_api_format_pdf_tabby_reader.py
@@ -182,7 +182,7 @@ def test_pdf_with_tables(self) -> None:
 
         table = tables[3]["cells"]
         self.assertListEqual(["", "2016", "2017", "2018", "2019"], self._get_text_of_row(table[0]))
-        self.assertListEqual(["", "Прогноз", "Прогноз бюджета"], self._get_text_of_row(table[1]))
+        self.assertListEqual(["", "Прогноз", "Прогноз бюджета", "Прогноз бюджета", "Прогноз бюджета"], self._get_text_of_row(table[1]))
         self.assertListEqual(["Ненефтегазов\nые доходы", "10,4", "9,6", "9,6", "9,6"], self._get_text_of_row(table[21]))
         self.assertListEqual(["Сальдо\nбюджета", "-3,7", "-3,2", "-2,2", "-1,2"], self._get_text_of_row(table[22]))
 
@@ -227,7 +227,7 @@ def test_tables_with_merged_cells(self) -> None:
         result = self._send_request(file_name, data=dict(pdf_with_text_layer="tabby"))
         table = result["content"]["tables"][0]["cells"]
 
-        hidden_cells_big_table_with_colspan = [[(1, 0), 10], [(5, 1), 5]]
+        hidden_cells_big_table_with_colspan = [[(1, 0), 10], [(5, 5), 5]]
 
         for (i, j), k in hidden_cells_big_table_with_colspan:
             self.assertFalse(table[i][j]["invisible"])
diff --git a/tests/api_tests/test_api_misc_multipage_table.py b/tests/api_tests/test_api_misc_multipage_table.py
index 5c3c0d2e..ef64fb09 100644
--- a/tests/api_tests/test_api_misc_multipage_table.py
+++ b/tests/api_tests/test_api_misc_multipage_table.py
@@ -1,4 +1,5 @@
 import os
+import unittest
 from typing import List
 
 from tests.api_tests.abstract_api_test import AbstractTestApiDocReader
@@ -45,14 +46,13 @@ def test_api_ml_table_recognition_synthetic_data_1(self) -> None:
             tables = self._get_tables(file_name, pdf_with_text_layer=pdf_param)
             self.assertEqual(len(tables), 1)
 
+    @unittest.skip("TLDR-886 подправить координаты ячеек таблиц табби")
     def test_api_ml_table_recognition_synthetic_data_3(self) -> None:
         file_name = "example_mp_table_with_repeate_header_2.pdf"
-        for pdf_param in ["false", "true"]:
-            # for "tabby" doesn't work because need to unify the output of table in matrix form and set attribute cells,
-            # without this tables won't be merge.
+        for pdf_param in ["false", "true", "tabby"]:
             tables = self._get_tables(file_name, pdf_with_text_layer=pdf_param)
 
-            self.assertEqual(len(tables), 1)
+            self.assertEqual(len(tables), 1, f"Error when pdf_with_text_layer={pdf_param}")
             table = tables[0]["cells"]
 
             self.assertListEqual(
@@ -67,8 +67,5 @@ def test_api_ml_table_recognition_synthetic_data_3(self) -> None:
             self.assertListEqual(["Данные 3", "Данные 3", "Данные 3", "Данные 3", "Данные 3"], self._get_text_of_row(table[5]))
             self.assertListEqual(["Данные 4", "Данные 4", "Данные 4", "Данные 4", "Данные 4"], self._get_text_of_row(table[6]))
             self.assertListEqual(["Данные 5", "Данные 5", "Данные 5", "Данные 5", "Данные 5"], self._get_text_of_row(table[7]))
-            self.assertListEqual(["Заголовок\nБольшой", "Заголовок поменьше 1", "Заголовок поменьше 1", "Заголовок поменьше 2", "Заголовок поменьше 2"],
-                                 self._get_text_of_row(table[8]))
-            self.assertListEqual(["Заголовок\nБольшой", "Заголовочек 1", "Заголовочек 2", "Заголовочек 3", "Заголовочек 4"], self._get_text_of_row(table[9]))
-            self.assertListEqual(["Данные 6", "Данные 6", "Данные 6", "Данные 6", "Данные 6"], self._get_text_of_row(table[10]))
-            self.assertListEqual(["Данные 7", "Данные 7", "Данные 7", "Данные 7", "Данные 7"], self._get_text_of_row(table[11]))
+            self.assertListEqual(["Данные 6", "Данные 6", "Данные 6", "Данные 6", "Данные 6"], self._get_text_of_row(table[8]))
+            self.assertListEqual(["Данные 7", "Данные 7", "Данные 7", "Данные 7", "Данные 7"], self._get_text_of_row(table[9]))
diff --git a/tests/api_tests/test_api_module_table_recognizer.py b/tests/api_tests/test_api_module_table_recognizer.py
index a73b4ee5..a6f48a70 100644
--- a/tests/api_tests/test_api_module_table_recognizer.py
+++ b/tests/api_tests/test_api_module_table_recognizer.py
@@ -1,6 +1,5 @@
 import json
 import os
-import unittest
 from typing import List
 
 from tests.api_tests.abstract_api_test import AbstractTestApiDocReader
@@ -98,21 +97,6 @@ def _check_header_table(self, cells: List[dict]) -> None:
         self._check_similarity(row0[9], "Систетематический\nконтроль")
         self._check_similarity(row0[10], "Экспертная оценка")
 
-    @unittest.skip("TODO")
-    def test_api_table_recognition_with_diff_orient_cells_90(self) -> None:
-        file_name = "example_table_with_90_orient_cells.pdf"
-        response = self._send_request(file_name, dict(orient_analysis_cells=True, orient_cell_angle="90"))
-        table = response["content"]["tables"][0]
-
-        self._check_header_table(table["cells"])
-
-    @unittest.skip
-    def test_api_table_recognition_with_diff_orient_cells_270(self) -> None:
-        file_name = "example_table_with_270_orient_cells.pdf"
-        response = self._send_request(file_name, dict(orient_analysis_cells=True, orient_cell_angle="270"))
-        table = response["content"]["tables"][0]
-        self._check_header_table(table["cells"])
-
     def test_pdf_table(self) -> None:
         file_name = "example_with_table1.pdf"
         result = self._send_request(file_name)
diff --git a/tests/unit_tests/test_module_cell_splitter.py b/tests/unit_tests/test_module_cell_splitter.py
index ad48952a..36113dbc 100644
--- a/tests/unit_tests/test_module_cell_splitter.py
+++ b/tests/unit_tests/test_module_cell_splitter.py
@@ -1,5 +1,7 @@
 import unittest
 
+from dedocutils.data_structures import BBox
+
 from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.cell_splitter import CellSplitter
 
@@ -10,42 +12,42 @@ class TestCellSplitter(unittest.TestCase):
     def test_merge_close_borders(self) -> None:
         cells = [
             [
-                Cell(x_top_left=0, y_top_left=0, x_bottom_right=50, y_bottom_right=30),
-                Cell(x_top_left=51, y_top_left=2, x_bottom_right=90, y_bottom_right=29)
+                Cell(BBox(x_top_left=0, y_top_left=0, width=50, height=30)),
+                Cell(BBox(x_top_left=51, y_top_left=2, width=39, height=27))
             ],
             [
-                Cell(x_top_left=0, y_top_left=31, x_bottom_right=50, y_bottom_right=50),
-                Cell(x_top_left=51, y_top_left=31, x_bottom_right=91, y_bottom_right=50)
+                Cell(BBox(x_top_left=0, y_top_left=31, width=50, height=19)),
+                Cell(BBox(x_top_left=51, y_top_left=31, width=40, height=19))
             ]
         ]
         cells_merged = self.splitter._merge_close_borders(cells)
-        self.assertEqual(0, cells_merged[0][0].x_top_left)
-        self.assertEqual(0, cells_merged[0][0].y_top_left)
-        self.assertEqual(50, cells_merged[0][0].x_bottom_right)
-        self.assertEqual(29, cells_merged[0][0].y_bottom_right)
-
-        self.assertEqual(50, cells_merged[0][1].x_top_left)
-        self.assertEqual(0, cells_merged[0][1].y_top_left)
-        self.assertEqual(90, cells_merged[0][1].x_bottom_right)
-        self.assertEqual(29, cells_merged[0][1].y_bottom_right)
-
-        self.assertEqual(0, cells_merged[1][0].x_top_left)
-        self.assertEqual(29, cells_merged[1][0].y_top_left)
-        self.assertEqual(50, cells_merged[1][0].x_bottom_right)
-        self.assertEqual(50, cells_merged[1][0].y_bottom_right)
-
-        self.assertEqual(50, cells_merged[1][1].x_top_left)
-        self.assertEqual(29, cells_merged[1][1].y_top_left)
-        self.assertEqual(90, cells_merged[1][1].x_bottom_right)
-        self.assertEqual(50, cells_merged[1][1].y_bottom_right)
+        self.assertEqual(0, cells_merged[0][0].bbox.x_top_left)
+        self.assertEqual(0, cells_merged[0][0].bbox.y_top_left)
+        self.assertEqual(50, cells_merged[0][0].bbox.x_bottom_right)
+        self.assertEqual(29, cells_merged[0][0].bbox.y_bottom_right)
+
+        self.assertEqual(50, cells_merged[0][1].bbox.x_top_left)
+        self.assertEqual(0, cells_merged[0][1].bbox.y_top_left)
+        self.assertEqual(90, cells_merged[0][1].bbox.x_bottom_right)
+        self.assertEqual(29, cells_merged[0][1].bbox.y_bottom_right)
+
+        self.assertEqual(0, cells_merged[1][0].bbox.x_top_left)
+        self.assertEqual(29, cells_merged[1][0].bbox.y_top_left)
+        self.assertEqual(50, cells_merged[1][0].bbox.x_bottom_right)
+        self.assertEqual(50, cells_merged[1][0].bbox.y_bottom_right)
+
+        self.assertEqual(50, cells_merged[1][1].bbox.x_top_left)
+        self.assertEqual(29, cells_merged[1][1].bbox.y_top_left)
+        self.assertEqual(90, cells_merged[1][1].bbox.x_bottom_right)
+        self.assertEqual(50, cells_merged[1][1].bbox.y_bottom_right)
 
     def test_merge_close_borders_one_cell(self) -> None:
-        cells = [[Cell(x_top_left=0, y_top_left=0, x_bottom_right=50, y_bottom_right=30)]]
+        cells = [[Cell(BBox(x_top_left=0, y_top_left=0, width=50, height=30))]]
         cells_merged = self.splitter._merge_close_borders(cells)
-        self.assertEqual(0, cells_merged[0][0].x_top_left)
-        self.assertEqual(0, cells_merged[0][0].y_top_left)
-        self.assertEqual(50, cells_merged[0][0].x_bottom_right)
-        self.assertEqual(30, cells_merged[0][0].y_bottom_right)
+        self.assertEqual(0, cells_merged[0][0].bbox.x_top_left)
+        self.assertEqual(0, cells_merged[0][0].bbox.y_top_left)
+        self.assertEqual(50, cells_merged[0][0].bbox.x_bottom_right)
+        self.assertEqual(30, cells_merged[0][0].bbox.y_bottom_right)
 
     def test_merge_zero_cells(self) -> None:
         cells = [[]]
@@ -58,24 +60,24 @@ def test_split_zero_cells(self) -> None:
         self.assertListEqual([[]], matrix)
 
     def test_split_one_cell(self) -> None:
-        cells = [[Cell(x_top_left=0, y_top_left=0, x_bottom_right=10, y_bottom_right=15)]]
+        cells = [[Cell(BBox(x_top_left=0, y_top_left=0, width=10, height=15))]]
         matrix = self.splitter.split(cells=cells)
         self.assertEqual(1, len(matrix))
         self.assertEqual(1, len(matrix[0]))
         new_cell = matrix[0][0]
-        self.assertEqual(0, new_cell.x_top_left)
-        self.assertEqual(0, new_cell.y_top_left)
-        self.assertEqual(10, new_cell.x_bottom_right)
-        self.assertEqual(15, new_cell.y_bottom_right)
+        self.assertEqual(0, new_cell.bbox.x_top_left)
+        self.assertEqual(0, new_cell.bbox.y_top_left)
+        self.assertEqual(10, new_cell.bbox.x_bottom_right)
+        self.assertEqual(15, new_cell.bbox.y_bottom_right)
 
     def test_horizontal_split(self) -> None:
         cells = [
             [
-                Cell(x_top_left=0, y_top_left=0, x_bottom_right=3, y_bottom_right=5),
-                Cell(x_top_left=3, y_top_left=0, x_bottom_right=7, y_bottom_right=3),
+                Cell(BBox(x_top_left=0, y_top_left=0, width=3, height=5)),
+                Cell(BBox(x_top_left=3, y_top_left=0, width=4, height=3)),
             ],
             [
-                Cell(x_top_left=3, y_top_left=3, x_bottom_right=7, y_bottom_right=5),
+                Cell(BBox(x_top_left=3, y_top_left=3, width=4, height=2)),
             ]
         ]
         matrix = self.splitter.split(cells)
@@ -83,34 +85,34 @@ def test_horizontal_split(self) -> None:
         self.assertEqual(2, len(matrix[0]))
         self.assertEqual(2, len(matrix[1]))
         [cell_a, cell_b], [cell_c, cell_d] = matrix
-        self.assertEqual(0, cell_a.x_top_left)
-        self.assertEqual(0, cell_a.y_top_left)
-        self.assertEqual(3, cell_a.x_bottom_right)
-        self.assertEqual(3, cell_a.y_bottom_right)
-
-        self.assertEqual(3, cell_b.x_top_left)
-        self.assertEqual(0, cell_b.y_top_left)
-        self.assertEqual(7, cell_b.x_bottom_right)
-        self.assertEqual(3, cell_b.y_bottom_right)
-
-        self.assertEqual(0, cell_c.x_top_left)
-        self.assertEqual(3, cell_c.y_top_left)
-        self.assertEqual(3, cell_c.x_bottom_right)
-        self.assertEqual(5, cell_c.y_bottom_right)
-
-        self.assertEqual(3, cell_d.x_top_left)
-        self.assertEqual(3, cell_d.y_top_left)
-        self.assertEqual(7, cell_d.x_bottom_right)
-        self.assertEqual(5, cell_d.y_bottom_right)
+        self.assertEqual(0, cell_a.bbox.x_top_left)
+        self.assertEqual(0, cell_a.bbox.y_top_left)
+        self.assertEqual(3, cell_a.bbox.x_bottom_right)
+        self.assertEqual(3, cell_a.bbox.y_bottom_right)
+
+        self.assertEqual(3, cell_b.bbox.x_top_left)
+        self.assertEqual(0, cell_b.bbox.y_top_left)
+        self.assertEqual(7, cell_b.bbox.x_bottom_right)
+        self.assertEqual(3, cell_b.bbox.y_bottom_right)
+
+        self.assertEqual(0, cell_c.bbox.x_top_left)
+        self.assertEqual(3, cell_c.bbox.y_top_left)
+        self.assertEqual(3, cell_c.bbox.x_bottom_right)
+        self.assertEqual(5, cell_c.bbox.y_bottom_right)
+
+        self.assertEqual(3, cell_d.bbox.x_top_left)
+        self.assertEqual(3, cell_d.bbox.y_top_left)
+        self.assertEqual(7, cell_d.bbox.x_bottom_right)
+        self.assertEqual(5, cell_d.bbox.y_bottom_right)
 
     def test_vertical_split(self) -> None:
         cells = [
             [
-                Cell(x_top_left=0, y_top_left=0, x_bottom_right=8, y_bottom_right=2),
+                Cell(BBox(x_top_left=0, y_top_left=0, width=8, height=2)),
             ],
             [
-                Cell(x_top_left=0, y_top_left=2, x_bottom_right=5, y_bottom_right=5),
-                Cell(x_top_left=5, y_top_left=2, x_bottom_right=8, y_bottom_right=5),
+                Cell(BBox(x_top_left=0, y_top_left=2, width=5, height=3)),
+                Cell(BBox(x_top_left=5, y_top_left=2, width=3, height=3)),
             ]
         ]
         matrix = self.splitter.split(cells)
@@ -118,35 +120,35 @@ def test_vertical_split(self) -> None:
         self.assertEqual(2, len(matrix[0]))
         self.assertEqual(2, len(matrix[1]))
         [cell_a, cell_b], [cell_c, cell_d] = matrix
-        self.assertEqual(0, cell_a.x_top_left)
-        self.assertEqual(0, cell_a.y_top_left)
-        self.assertEqual(5, cell_a.x_bottom_right)
-        self.assertEqual(2, cell_a.y_bottom_right)
-
-        self.assertEqual(5, cell_b.x_top_left)
-        self.assertEqual(0, cell_b.y_top_left)
-        self.assertEqual(8, cell_b.x_bottom_right)
-        self.assertEqual(2, cell_b.y_bottom_right)
-
-        self.assertEqual(0, cell_c.x_top_left)
-        self.assertEqual(2, cell_c.y_top_left)
-        self.assertEqual(5, cell_c.x_bottom_right)
-        self.assertEqual(5, cell_c.y_bottom_right)
-
-        self.assertEqual(5, cell_d.x_top_left)
-        self.assertEqual(2, cell_d.y_top_left)
-        self.assertEqual(8, cell_d.x_bottom_right)
-        self.assertEqual(5, cell_d.y_bottom_right)
+        self.assertEqual(0, cell_a.bbox.x_top_left)
+        self.assertEqual(0, cell_a.bbox.y_top_left)
+        self.assertEqual(5, cell_a.bbox.x_bottom_right)
+        self.assertEqual(2, cell_a.bbox.y_bottom_right)
+
+        self.assertEqual(5, cell_b.bbox.x_top_left)
+        self.assertEqual(0, cell_b.bbox.y_top_left)
+        self.assertEqual(8, cell_b.bbox.x_bottom_right)
+        self.assertEqual(2, cell_b.bbox.y_bottom_right)
+
+        self.assertEqual(0, cell_c.bbox.x_top_left)
+        self.assertEqual(2, cell_c.bbox.y_top_left)
+        self.assertEqual(5, cell_c.bbox.x_bottom_right)
+        self.assertEqual(5, cell_c.bbox.y_bottom_right)
+
+        self.assertEqual(5, cell_d.bbox.x_top_left)
+        self.assertEqual(2, cell_d.bbox.y_top_left)
+        self.assertEqual(8, cell_d.bbox.x_bottom_right)
+        self.assertEqual(5, cell_d.bbox.y_bottom_right)
 
     def test_no_split(self) -> None:
         cells = [
             [
-                Cell(x_top_left=160, y_top_left=321, x_bottom_right=825, y_bottom_right=369),
-                Cell(x_top_left=825, y_top_left=321, x_bottom_right=1494, y_bottom_right=369)
+                Cell(BBox(x_top_left=160, y_top_left=321, width=665, height=48)),
+                Cell(BBox(x_top_left=825, y_top_left=321, width=669, height=48))
             ],
             [
-                Cell(x_top_left=160, y_top_left=374, x_bottom_right=825, y_bottom_right=423),
-                Cell(x_top_left=825, y_top_left=374, x_bottom_right=1494, y_bottom_right=423)
+                Cell(BBox(x_top_left=160, y_top_left=374, width=665, height=49)),
+                Cell(BBox(x_top_left=825, y_top_left=374, width=669, height=49))
             ]
         ]
 
diff --git a/tests/unit_tests/test_module_gost_frame_recognizer.py b/tests/unit_tests/test_module_gost_frame_recognizer.py
index a2c33f09..1ac3a7c2 100644
--- a/tests/unit_tests/test_module_gost_frame_recognizer.py
+++ b/tests/unit_tests/test_module_gost_frame_recognizer.py
@@ -31,8 +31,6 @@ def _get_params_for_parse(self, parameters: Optional[dict], file_path: Optional[
         file_path = file_path if file_path else ""
         params_for_parse = ParametersForParseDoc(
             language=param_utils.get_param_language(parameters),
-            orient_analysis_cells=param_utils.get_param_orient_analysis_cells(parameters),
-            orient_cell_angle=param_utils.get_param_orient_cell_angle(parameters),
             is_one_column_document=param_utils.get_param_is_one_column_document(parameters),
             document_orientation=param_utils.get_param_document_orientation(parameters),
             need_header_footers_analysis=param_utils.get_param_need_header_footers_analysis(parameters),
diff --git a/tests/unit_tests/test_module_table_detection.py b/tests/unit_tests/test_module_table_detection.py
index 0aef1be0..29d2e8da 100644
--- a/tests/unit_tests/test_module_table_detection.py
+++ b/tests/unit_tests/test_module_table_detection.py
@@ -7,13 +7,12 @@
 
 from dedoc.readers.pdf_reader.data_classes.tables.scantable import ScanTable
 from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_recognizer import TableRecognizer
-from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_utils.accuracy_table_rec import get_quantitative_parameters
-from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_utils.utils import equal_with_eps, similarity as utils_similarity
+from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_utils.utils import equal_with_eps, get_statistic_values, similarity as sim
 from tests.test_utils import get_full_path, get_test_config
 
 
 def similarity(s1: str, s2: str, threshold: float = 0.8) -> bool:
-    return True if utils_similarity(s1, s2) > threshold else False
+    return True if sim(s1, s2) > threshold else False
 
 
 class TestRecognizedTable(unittest.TestCase):
@@ -21,12 +20,7 @@ class TestRecognizedTable(unittest.TestCase):
     table_recognizer = TableRecognizer(config=get_test_config())
 
     def get_table(self, image: np.ndarray, language: str = "rus", table_type: str = "") -> List[ScanTable]:
-        image, tables = self.table_recognizer.recognize_tables_from_image(image=image,
-                                                                          page_number=0,
-                                                                          language=language,
-                                                                          orient_analysis_cells=False,
-                                                                          orient_cell_angle=0,
-                                                                          table_type=table_type)
+        image, tables = self.table_recognizer.recognize_tables_from_image(image=image, page_number=0, language=language, table_type=table_type)
         return tables
 
     def test_table_wo_external_bounds(self) -> None:
@@ -50,13 +44,13 @@ def test_table_split_right_column(self) -> None:
         image = cv2.imread(path_image, 0)
 
         tables = self.get_table(image, "rus+eng", table_type="split_last_column+wo_external_bounds")
-        self.assertTrue(tables[0].matrix_cells[4][-1].get_text(), "40703978900000345077")
-        self.assertTrue(tables[0].matrix_cells[5][-1].get_text(), "049401814")
-        self.assertTrue(tables[0].matrix_cells[6][-1].get_text(), "30101810200000000814")
-        self.assertTrue(tables[0].matrix_cells[7][-1].get_text(), "049401814")
-        self.assertTrue(tables[0].matrix_cells[8][-1].get_text(), "30101810200000000814")
-        self.assertTrue(tables[0].matrix_cells[9][-1].get_text(), "30110978700000070815")
-        self.assertTrue(tables[0].matrix_cells[10][-1].get_text(), "30110978700000070815")
+        self.assertTrue(tables[0].cells[4][-1].get_text(), "40703978900000345077")
+        self.assertTrue(tables[0].cells[5][-1].get_text(), "049401814")
+        self.assertTrue(tables[0].cells[6][-1].get_text(), "30101810200000000814")
+        self.assertTrue(tables[0].cells[7][-1].get_text(), "049401814")
+        self.assertTrue(tables[0].cells[8][-1].get_text(), "30101810200000000814")
+        self.assertTrue(tables[0].cells[9][-1].get_text(), "30110978700000070815")
+        self.assertTrue(tables[0].cells[10][-1].get_text(), "30110978700000070815")
 
     def test_table_extract_one_cell_and_one_cell_tables(self) -> None:
         path_image = get_full_path("data/lising/platezhka.jpg")
@@ -115,73 +109,73 @@ def test_table_recognition_1(self) -> None:
         image = cv2.imread(get_full_path("data/tables/example_with_table3.png"), 0)
         tables = self.get_table(image)
 
-        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_quantitative_parameters(tables[0].matrix_cells)
+        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_statistic_values(tables[0].cells)
 
         self.assertEqual(cnt_rows, 8)
         self.assertEqual(cnt_columns, 3)
         self.assertEqual(cnt_a_cell, 3)
         self.assertEqual(cnt_cell, 24)
-        self.assertTrue(similarity(tables[0].matrix_cells[0][1].get_text(), "Наименование данных"))
-        self.assertTrue(similarity(tables[0].matrix_cells[0][2].get_text(), "Данные"))
-        self.assertTrue(similarity(tables[0].matrix_cells[4][1].get_text().capitalize(), "Инн"))
-        self.assertTrue(similarity(tables[0].matrix_cells[3][1].get_text(), "Руководитель (ФИО, телефон,\nфакс, электронный адрес)"))
+        self.assertTrue(similarity(tables[0].cells[0][1].get_text(), "Наименование данных"))
+        self.assertTrue(similarity(tables[0].cells[0][2].get_text(), "Данные"))
+        self.assertTrue(similarity(tables[0].cells[4][1].get_text().capitalize(), "Инн"))
+        self.assertTrue(similarity(tables[0].cells[3][1].get_text(), "Руководитель (ФИО, телефон,\nфакс, электронный адрес)"))
 
     def test_table_recognition_2(self) -> None:
         image = cv2.imread(get_full_path("data/tables/example_with_table4.jpg"), 0)
         tables = self.get_table(image)
 
-        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_quantitative_parameters(tables[0].matrix_cells)
+        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_statistic_values(tables[0].cells)
 
         self.assertEqual(cnt_rows, 5)
         self.assertEqual(cnt_columns, 3)
         self.assertEqual(cnt_a_cell, 3)
         self.assertEqual(cnt_cell, 15)
-        self.assertTrue(similarity(tables[0].matrix_cells[0][1].get_text(), "Перечень основных данных и\nтребований"))
-        self.assertTrue(similarity(tables[0].matrix_cells[0][2].get_text(), "Основные данные и требования"))
-        self.assertTrue(similarity(tables[0].matrix_cells[3][1].get_text(), "Количество"))
-        self.assertTrue(similarity(tables[0].matrix_cells[4][1].get_text(), "Технические параметры оборудования"))
+        self.assertTrue(similarity(tables[0].cells[0][1].get_text(), "Перечень основных данных и\nтребований"))
+        self.assertTrue(similarity(tables[0].cells[0][2].get_text(), "Основные данные и требования"))
+        self.assertTrue(similarity(tables[0].cells[3][1].get_text(), "Количество"))
+        self.assertTrue(similarity(tables[0].cells[4][1].get_text(), "Технические параметры оборудования"))
 
     def test_table_recognition_3(self) -> None:
         image = cv2.imread(get_full_path("data/tables/example_with_table5.png"), 0)
         tables = self.get_table(image)
 
-        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_quantitative_parameters(tables[0].matrix_cells)
+        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_statistic_values(tables[0].cells)
 
         self.assertEqual(cnt_rows, 13)
         self.assertEqual(cnt_columns, 3)
         self.assertEqual(cnt_a_cell, 3)
         self.assertEqual(cnt_cell, 39)
-        self.assertTrue(similarity(tables[0].matrix_cells[0][1].get_text(), "Техническая характеристика"))
-        self.assertTrue(similarity(tables[0].matrix_cells[0][2].get_text(), "Показатель"))
-        self.assertTrue(similarity(tables[0].matrix_cells[6][1].get_text(), "Использование крана и его механизмов"))
-        self.assertTrue(similarity(tables[0].matrix_cells[7][1].get_text(), "Тип привода:"))
+        self.assertTrue(similarity(tables[0].cells[0][1].get_text(), "Техническая характеристика"))
+        self.assertTrue(similarity(tables[0].cells[0][2].get_text(), "Показатель"))
+        self.assertTrue(similarity(tables[0].cells[6][1].get_text(), "Использование крана и его механизмов"))
+        self.assertTrue(similarity(tables[0].cells[7][1].get_text(), "Тип привода:"))
 
     def test_table_recognition_4(self) -> None:
         image = cv2.imread(get_full_path("data/tables/example_with_table5.png"), 0)
         tables = self.get_table(image)
 
-        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_quantitative_parameters(tables[0].matrix_cells)
+        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_statistic_values(tables[0].cells)
 
         self.assertEqual(cnt_rows, 13)
         self.assertEqual(cnt_columns, 3)
         self.assertEqual(cnt_a_cell, 3)
         self.assertEqual(cnt_cell, 39)
-        self.assertTrue(similarity(tables[0].matrix_cells[0][1].get_text(), "Техническая характеристика"))
-        self.assertTrue(similarity(tables[0].matrix_cells[0][2].get_text(), "Показатель"))
-        self.assertTrue(similarity(tables[0].matrix_cells[6][1].get_text(), "Использование крана и его механизмов"))
-        self.assertTrue(similarity(tables[0].matrix_cells[7][1].get_text(), "Тип привода:"))
+        self.assertTrue(similarity(tables[0].cells[0][1].get_text(), "Техническая характеристика"))
+        self.assertTrue(similarity(tables[0].cells[0][2].get_text(), "Показатель"))
+        self.assertTrue(similarity(tables[0].cells[6][1].get_text(), "Использование крана и его механизмов"))
+        self.assertTrue(similarity(tables[0].cells[7][1].get_text(), "Тип привода:"))
 
     def test_table_recognition_with_rotate_5(self) -> None:
         image = cv2.imread(get_full_path("data/tables/example_with_table6.png"), 0)
         tables = self.get_table(image)
 
-        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_quantitative_parameters(tables[0].matrix_cells)
+        cnt_a_cell, cnt_cell, cnt_columns, cnt_rows = get_statistic_values(tables[0].cells)
 
         self.assertEqual(cnt_rows, 3)
         self.assertEqual(cnt_columns, 7)
         self.assertEqual(cnt_a_cell, 7)
         self.assertEqual(cnt_cell, 21)
-        self.assertTrue(similarity(tables[0].matrix_cells[0][1].get_text(), "Группа"))
-        self.assertTrue(similarity(tables[0].matrix_cells[0][3].get_text(), "Наименование"))
-        self.assertTrue(similarity(tables[0].matrix_cells[2][2].get_text(), "Новая\nпозиция"))
-        self.assertTrue(similarity(tables[0].matrix_cells[2][5].get_text(), "3 (три)\nшт."))
+        self.assertTrue(similarity(tables[0].cells[0][1].get_text(), "Группа"))
+        self.assertTrue(similarity(tables[0].cells[0][3].get_text(), "Наименование"))
+        self.assertTrue(similarity(tables[0].cells[2][2].get_text(), "Новая\nпозиция"))
+        self.assertTrue(similarity(tables[0].cells[2][5].get_text(), "3 (три)\nшт."))

From cedbfc0bd3e0040d6c24eca4498e34802b225aa7 Mon Sep 17 00:00:00 2001
From: "Zykina (Bogatenkova) Anastasiya" <bogatenkova.anastasiya@mail.ru>
Date: Wed, 25 Dec 2024 12:09:26 +0300
Subject: [PATCH 4/4] new version 2.3.2 (#512)

---
 VERSION                   |  2 +-
 docs/source/changelog.rst | 17 ++++++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/VERSION b/VERSION
index a6254504..e7034819 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.3.1
\ No newline at end of file
+2.3.2
\ No newline at end of file
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 3ecf0ada..e47341b3 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -1,15 +1,22 @@
 Changelog
 =========
 
+v2.3.2 (2024-12-25)
+-------------------
+Release note: `v2.3.2 <https://github.com/ispras/dedoc/releases/tag/v2.3.2>`_
+
+* Improve merging multi-page tables in `PdfTabbyReader`.
+* Stop parsing after client disconnection (for API usage, see `issue 488 <https://github.com/ispras/dedoc/issues/488>`_).
+
 v2.3.1 (2024-11-15)
 -------------------
 Release note: `v2.3.1 <https://github.com/ispras/dedoc/releases/tag/v2.3.1>`_
 
-* Fix bug with bold lines in `DocxReader` (see `issue 479 <https://github.com/ispras/dedoc/issues/479>`_)
-* Upgraded requirements.txt (beautifulsoup4 to 4.12.3 version)
-* Added support for external grobid (added support parameter "Authorization")
-* Added GOST (Russian government standard) frame recognition in `PdfTabbyReader` (`need_gost_frame_analysis` parameter)
-* Update documentation (added GOST frame recognition)
+* Fix bug with bold lines in `DocxReader` (see `issue 479 <https://github.com/ispras/dedoc/issues/479>`_).
+* Upgraded requirements.txt (beautifulsoup4 to 4.12.3 version).
+* Added support for external grobid (added support env variable `GROBID_AUTH_KEY` for "Authorization" in request header).
+* Added GOST (Russian government standard) frame recognition in `PdfTabbyReader` (`need_gost_frame_analysis` parameter).
+* Update documentation (added GOST frame recognition).
 
 v2.3 (2024-09-19)
 -----------------