From 96bec550ff44100e5ff88a2b034453acb8cedeef Mon Sep 17 00:00:00 2001 From: awwaawwa <8493196+awwaawwa@users.noreply.github.com> Date: Mon, 10 Feb 2025 17:59:15 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20fix(paragraph=5Ffinder):=20Skip?= =?UTF-8?q?=20paragraphs=20with=20debug=20information?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Enhance paragraph filtering logic to skip paragraphs with debug information - Prevent processing of paragraphs containing debug-related character styles - Improve paragraph composition detection and filtering --- yadt/document_il/midend/add_debug_information.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yadt/document_il/midend/add_debug_information.py b/yadt/document_il/midend/add_debug_information.py index 6f2c810..543dbb6 100644 --- a/yadt/document_il/midend/add_debug_information.py +++ b/yadt/document_il/midend/add_debug_information.py @@ -80,6 +80,8 @@ def process_page(self, page: il_version_1.Page): new_paragraphs = [] for paragraph in page.pdf_paragraph: + if not paragraph.pdf_paragraph_composition: + continue if any((x.pdf_same_style_unicode_characters.debug_info for x in paragraph.pdf_paragraph_composition if x.pdf_same_style_unicode_characters)): continue # Create a rectangle box