Skip to content

Commit

Permalink
Merge pull request #92 from funstory-ai/develop
Browse files Browse the repository at this point in the history
feat(progress_monitor): add configurable report interval and threading support
  • Loading branch information
awwaawwa authored Feb 13, 2025
2 parents cc10c0a + 28eef00 commit c6068cd
Show file tree
Hide file tree
Showing 18 changed files with 55 additions and 29 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ uv run babeldoc --bing --files example.pdf --files example2.pdf

- `--output`, `-o`: Output directory for translated files. If not set, use current working directory.
- `--debug`, `-d`: Enable debug logging level and export detailed intermediate results in `~/.cache/yadt/working`.
- `--report-interval`: Progress report interval in seconds (default: 0.1).

### Configuration File

Expand Down
2 changes: 1 addition & 1 deletion babeldoc/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.6"
__version__ = "0.1.7"
2 changes: 1 addition & 1 deletion babeldoc/document_il/backend/pdf_creater.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


class PDFCreater:
stage_name = "创建PDF文件"
stage_name = "Create PDF file"

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/frontend/il_creater.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


class ILCreater:
stage_name = "解析PDF并创建中间表示"
stage_name = "Parse PDF and Create Intermediate Representation"

def __init__(self, translation_config: TranslationConfig):
self.progress = None
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/midend/add_debug_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class AddDebugInformation:
stage_name = "添加DEBUG信息"
stage_name = "Add Debug Information"

def __init__(self, translation_config: TranslationConfig):
self.translation_config = translation_config
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/midend/il_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def set_output(self, output: str):


class ILTranslator:
stage_name = "翻译段落"
stage_name = "Translate Paragraphs"

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/midend/layout_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class LayoutParser:
stage_name = "解析页面布局"
stage_name = "Parse Page Layout"

def __init__(self, translation_config: TranslationConfig):
self.translation_config = translation_config
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/midend/paragraph_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def generate_base58_id(length: int = 5) -> str:


class ParagraphFinder:
stage_name = "解析段落"
stage_name = "Parse Paragraphs"

def __init__(self, translation_config: TranslationConfig):
self.translation_config = translation_config
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/midend/remove_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class RemoveDescent:
stage_name = "移除字体下沉"
stage_name = "Remove Char Descent"

def __init__(self, translation_config: TranslationConfig):
self.translation_config = translation_config
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/midend/styles_and_formulas.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


class StylesAndFormulas:
stage_name = "解析公式与样式"
stage_name = "Parse Formulas and Styles"

def __init__(self, translation_config: TranslationConfig):
self.translation_config = translation_config
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/midend/typesetting.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def render(self) -> [PdfCharacter]:


class Typesetting:
stage_name = "排版"
stage_name = "Typesetting"

def __init__(self, translation_config: TranslationConfig):
self.font_mapper = FontMapper(translation_config)
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/document_il/utils/fontmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class FontMapper:
stage_name = "添加字体"
stage_name = "Add Fonts"

def __init__(self, translation_config: TranslationConfig):
self.font_names = [
Expand Down
1 change: 1 addition & 0 deletions babeldoc/high_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ async def async_translate(translation_config: TranslationConfig):
finish_event=finish_event,
cancel_event=cancel_event,
loop=loop,
report_interval=translation_config.report_interval,
) as pm:
future = loop.run_in_executor(None, do_translate, pm, translation_config)
try:
Expand Down
9 changes: 8 additions & 1 deletion babeldoc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from babeldoc.translation_config import TranslationConfig

logger = logging.getLogger(__name__)
__version__ = "0.1.6"
__version__ = "0.1.7"


def create_parser():
Expand Down Expand Up @@ -170,6 +170,12 @@ def create_parser():
action="store_true",
help="Enable all compatibility enhancement options (equivalent to --skip-clean --dual-translate-first --disable-rich-text-translate)",
)
translation_params.add_argument(
"--report-interval",
type=float,
default=0.1,
help="Progress report interval in seconds (default: 0.1)",
)
service_params = translation_params.add_mutually_exclusive_group()
service_params.add_argument(
"--openai",
Expand Down Expand Up @@ -414,6 +420,7 @@ async def main():
dual_translate_first=args.dual_translate_first,
disable_rich_text_translate=args.disable_rich_text_translate,
enhance_compatibility=args.enhance_compatibility,
report_interval=args.report_interval,
)

# Create progress handler
Expand Down
43 changes: 29 additions & 14 deletions babeldoc/progress_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,26 @@ def __init__(
self.progress_change_callback = progress_change_callback
self.finish_callback = finish_callback
self.report_interval = report_interval
logger.debug(f"report_interval: {self.report_interval}")
self.last_report_time = 0
self.finish_stage_count = 0
self.finish_event = finish_event
self.cancel_event = cancel_event
self.loop = loop
if finish_event and not loop:
raise ValueError("finish_event requires a loop")
if self.progress_change_callback:
self.progress_change_callback(
type="stage_summary",
stages=[
{
"name": name,
"percent": 1.0 / len(stages),
}
for name in stages
],
)
self.lock = threading.Lock()

def stage_start(self, stage_name: str, total: int):
stage = self.stage[stage_name]
Expand All @@ -49,6 +62,7 @@ def stage_start(self, stage_name: str, total: int):
stage_current=0,
stage_total=total,
)
self.last_report_time = 0.0
return stage

def __enter__(self):
Expand All @@ -66,7 +80,7 @@ def on_finish(self):
self.finish_callback(type="error", error=CancelledError)

def stage_done(self, stage):
self.last_report_time = 0
self.last_report_time = 0.0
self.finish_stage_count += 1
if (
stage.current != stage.total
Expand Down Expand Up @@ -94,19 +108,20 @@ def calculate_current_progress(self, stage=None):
return progress

def stage_update(self, stage, n: int):
if (
self.progress_change_callback
and time.time() - self.last_report_time > self.report_interval
):
self.progress_change_callback(
type="progress_update",
stage=stage.display_name,
stage_progress=stage.current * 100 / stage.total,
stage_current=stage.current,
stage_total=stage.total,
overall_progress=self.calculate_current_progress(stage),
)
self.last_report_time = time.time()
with self.lock:
report_time_delta = time.time() - self.last_report_time
if report_time_delta < self.report_interval:
return
if self.progress_change_callback:
self.progress_change_callback(
type="progress_update",
stage=stage.display_name,
stage_progress=stage.current * 100 / stage.total,
stage_current=stage.current,
stage_total=stage.total,
overall_progress=self.calculate_current_progress(stage),
)
self.last_report_time = time.time()

def translate_done(self, translate_result):
if self.finish_callback:
Expand Down
2 changes: 2 additions & 0 deletions babeldoc/translation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
dual_translate_first: bool = False,
disable_rich_text_translate: bool = False, # 是否禁用富文本翻译
enhance_compatibility: bool = False, # 增强兼容性模式
report_interval: float = 0.1, # Progress report interval in seconds
):
self.input_file = input_file
self.translator = translator
Expand All @@ -58,6 +59,7 @@ def __init__(
self.disable_rich_text_translate = (
disable_rich_text_translate or enhance_compatibility
)
self.report_interval = report_interval
if progress_monitor:
if progress_monitor.cancel_event is None:
progress_monitor.cancel_event = threading.Event()
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "BabelDOC"
version = "0.1.6"
version = "0.1.7"
description = "Yet Another Document Translator"
license = "AGPL-3.0"
readme = "README.md"
Expand Down Expand Up @@ -117,7 +117,7 @@ dev = [
]

[bumpver]
current_version = "0.1.6"
current_version = "0.1.7"
version_pattern = "MAJOR.MINOR.PATCH[.PYTAGNUM]"

[bumpver.file_patterns]
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c6068cd

Please sign in to comment.