From 409f89f1bd4e35045923c5da8513d75437c16d4a Mon Sep 17 00:00:00 2001 From: dronperminov Date: Tue, 11 Jun 2024 16:10:00 +0300 Subject: [PATCH] TLDR-704 Add line for divide pages --- dedoc/api/api_utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dedoc/api/api_utils.py b/dedoc/api/api_utils.py index c942fefa..0d8108a4 100644 --- a/dedoc/api/api_utils.py +++ b/dedoc/api/api_utils.py @@ -120,12 +120,16 @@ def json2html(text: str, attachments: Optional[List[ParsedDocument]], tabs: int = 0, table2id: Dict[str, int] = None, - attach2id: Dict[str, int] = None) -> str: + attach2id: Dict[str, int] = None, + prev_page_id: int = -1) -> str: tables = [] if tables is None else tables attachments = [] if attachments is None else attachments table2id = {table.metadata.uid: table_id for table_id, table in enumerate(tables)} if table2id is None else table2id attach2id = {attachment.metadata.uid: attachment_id for attachment_id, attachment in enumerate(attachments)} if attach2id is None else attach2id + if paragraph.metadata.page_id != prev_page_id: + text += f"
Page {paragraph.metadata.page_id + 1}
" + ptext = __annotations2html(paragraph=paragraph, table2id=table2id, attach2id=attach2id, tabs=tabs) if paragraph.metadata.hierarchy_level.line_type in [HierarchyLevel.header, HierarchyLevel.root]: @@ -141,7 +145,8 @@ def json2html(text: str, text += ptext for subparagraph in paragraph.subparagraphs: - text = json2html(text=text, paragraph=subparagraph, tables=None, attachments=None, tabs=tabs + 4, table2id=table2id, attach2id=attach2id) + text = json2html(text=text, paragraph=subparagraph, tables=None, attachments=None, tabs=tabs + 4, table2id=table2id, attach2id=attach2id, + prev_page_id=paragraph.metadata.page_id) if tables is not None and len(tables) > 0: text += "

Tables:

"