Skip to content

Commit

Permalink
refactor(styles_and_formulas): simplify and optimize formula characte…
Browse files Browse the repository at this point in the history
…r processing

- remove redundant current_chars initialization
- update is_current_formula flag placement for clarity
- rename is_formulas_start_end_char to is_formulas_start_char
- adjust regex pattern in is_formulas_start_char to include '•'
  • Loading branch information
awwaawwa committed Jan 20, 2025
1 parent 442c5ea commit eb76bb9
Showing 1 changed file with 6 additions and 15 deletions.
21 changes: 6 additions & 15 deletions yadt/document_il/midend/styles_and_formulas.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,26 +63,17 @@ def process_page_formulas(self, page: Page):
continue

new_compositions = []
current_chars = []
is_current_formula = False # 当前是否在处理公式字符

for composition in paragraph.pdf_paragraph_composition:
if not composition.pdf_line:
if current_chars:
# 处理剩余字符
new_compositions.append(
self.create_composition(current_chars, is_current_formula)
)
current_chars = []
new_compositions.append(composition)
continue
current_chars = []
is_current_formula = False # 当前是否在处理公式字符

line = composition.pdf_line
for char in line.pdf_character:
is_formula = (
(
(
self.is_formulas_start_end_char(char.char_unicode)
self.is_formulas_start_char(char.char_unicode)
and not is_current_formula
)
or (
Expand Down Expand Up @@ -480,7 +471,7 @@ def is_formulas_font(self, font_name: str) -> bool:

return False

def is_formulas_start_end_char(self, char: str) -> bool:
def is_formulas_start_char(self, char: str) -> bool:
if "(cid:" in char:
return True
if self.translation_config.formular_char_pattern:
Expand All @@ -505,12 +496,12 @@ def is_formulas_start_end_char(self, char: str) -> bool:
)
):
return True
if re.match("[0-9\\[\\]]", char):
if re.match("[0-9\\[\\]]", char):
return True
return False

def is_formulas_middle_char(self, char: str) -> bool:
if self.is_formulas_start_end_char(char):
if self.is_formulas_start_char(char):
return True

if re.match(",", char):
Expand Down

0 comments on commit eb76bb9

Please sign in to comment.