Many bug fixes and adding time offset

philpw99 · Jan 19, 2025 · fedf6a7 · fedf6a7
1 parent 18d0031
commit fedf6a7
Show file tree

Hide file tree

Showing 19 changed files with 317 additions and 226 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@
 *.pyc
 */__pycache__/
 *.env
+.*
 
 # 测试和脚本
 /test/
@@ -24,4 +25,4 @@
 # 应用数据
 /AppData/
 /output/
-/work-dir/
+/work-dir/
diff --git a/app/common/config.py b/app/common/config.py
@@ -200,6 +200,12 @@ class Config(QConfig):
         1500, RangeValidator(500, 3000)
     )
 
+    time_offset = RangeConfigItem(
+        "Subtitle", "TimeOffset",
+        0,
+        RangeValidator(-5000, 5000)
+    )
+
     # ------------------- 软件页面配置 -------------------
     micaEnabled = ConfigItem("MainWindow", "MicaEnabled", False, BoolValidator())
     dpiScale = OptionsConfigItem(

diff --git a/app/core/bk_asr/ASRData.py b/app/core/bk_asr/ASRData.py
@@ -479,7 +479,7 @@ def from_vtt(vtt_str: str) -> 'ASRData':
     """
     segments = []
     # 跳过头部元数据
-    content = vtt_str.split('\n\n')[2:]
+    content = vtt_str.split('\n\n')[1:]
 
     timestamp_pattern = re.compile(r'(\d{2}):(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})\.(\d{3})')
 

diff --git a/app/core/subtitle_processor/optimizer.py b/app/core/subtitle_processor/optimizer.py
@@ -2,7 +2,7 @@
 import logging
 import os
 from concurrent.futures import ThreadPoolExecutor
-import re
+import re, json
 from typing import Dict
 
 import retry
@@ -12,7 +12,8 @@
     TRANSLATE_PROMPT,
     OPTIMIZER_PROMPT,
     REFLECT_TRANSLATE_PROMPT,
-    SINGLE_TRANSLATE_PROMPT
+    SINGLE_TRANSLATE_PROMPT,
+    SINGLE_BATCH_TRANSLATE_PROMPT
 )
 from ..subtitle_processor.aligner import SubtitleAligner
 from ..utils import json_repair
@@ -86,20 +87,29 @@ def optimizer_multi_thread(self, subtitle_json: Dict[int, str],
         chunks = [dict(items[i:i + batch_num]) for i in range(0, len(items), batch_num)]
 
         def process_chunk(chunk):
+            failed = False
             if translate:
                 try:
                     result = self.translate(chunk, reflect)
                 except Exception as e:
                     logger.error(f"翻译失败，使用单条翻译：{e}")
-                    result = self.translate_single(chunk)
+                    # logger.info(f"chunk len:{len(chunk)}\n{chunk}")
+                    failed = True
+                    # result = self.translate_single(chunk)
             else:
                 try:
                     result = self.optimize(chunk)
                 except Exception as e:
                     logger.error(f"优化失败：{e}")
                     result = chunk
+
+            if failed:
+                result = self.translate_single(chunk)
+                failed = False
+
             if callback:
-                callback(result)
+                if isinstance(result, Dict):
+                    callback(result)
             return result
 
         results = list(self.executor.map(process_chunk, chunks))
@@ -108,7 +118,7 @@ def process_chunk(chunk):
         optimizer_result = {k: v for result in results for k, v in result.items()}
         return optimizer_result
 
-    @retry.retry(tries=2)
+    @retry.retry(tries=1)
     def optimize(self, original_subtitle: Dict[int, str]) -> Dict[int, str]:
         """ Optimize the given subtitle. """
         logger.info(f"[+]正在优化字幕：{next(iter(original_subtitle))} - {next(reversed(original_subtitle))}")
@@ -133,7 +143,7 @@ def optimize(self, original_subtitle: Dict[int, str]) -> Dict[int, str]:
             self.llm_result_logger.info("===========")
         return aligned_subtitle
 
-    @retry.retry(tries=2)
+    @retry.retry(tries=1)
     def translate(self, original_subtitle: Dict[int, str], reflect=False) -> Dict[int, str]:
         """优化并翻译给定的字幕。"""
         if reflect:
@@ -153,6 +163,7 @@ def _reflect_translate(self, original_subtitle: Dict[int, str]):
         # print(response_content)
         optimized_text = {k: v["optimized_subtitle"] for k, v in response_content.items()}  # 字幕文本
         aligned_subtitle = repair_subtitle(original_subtitle, optimized_text)  # 修复字幕对齐问题
+        # print(aligned_subtitle)
         # 在 translations 中查找对应的翻译  文本-翻译 映射
         translations = {item["optimized_subtitle"]: item["revised_translation"] for item in response_content.values()}
 
@@ -212,70 +223,65 @@ def _create_optimizer_message(self, original_subtitle):
                    {"role": "user", "content": input_content}]
         return message
 
-    def translate_single(self, original_subtitle: Dict[int, str]) -> Dict[int, str]:
+    def translate_single(self, original_subtitles: Dict[int, str]) -> Dict[int, str]:
         """单条字幕翻译，用于在批量翻译失败时的备选方案"""
         translate_result = {}
-        for key, value in original_subtitle.items():
-            try:
-                message = [{"role": "system",
-                            "content": SINGLE_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language)},
-                           {"role": "user", "content": value}]
-                response = self.client.chat.completions.create(
-                    model=self.model,
-                    stream=False,
-                    messages=message)
-                translate = response.choices[0].message.content.replace("\n", "")
-                original_text = self.remove_punctuation(value)
-                translated_text = self.remove_punctuation(translate)
-                translate_result[key] = f"{original_text}\n{translated_text}"
-                logger.info(f"单条翻译结果: {translate_result[key]}")
-            except Exception as e:
-                logger.error(f"单条翻译失败: {e}")
-                translate_result[key] = f"{value}\n "
+        # logger.info(f"org sub:{original_subtitle}")
+        for key, value in original_subtitles.items():
+            # try:
+            message = [{"role": "system",
+                        "content": SINGLE_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language)},
+                        {"role": "user", "content": [value]}]
+            response = self.client.chat.completions.create(
+                model=self.model,
+                stream=False,
+                messages=message)
+            logger.info(f"response: {response}\n")
+            translate = response.choices[0].message.content.replace("\n", "")
+            original_text = self.remove_punctuation(value)
+            translated_text = self.remove_punctuation(translate)
+            translate_result[key] = f"{original_text}\n{translated_text}"
+            logger.info(f"单条翻译结果: {translate_result[key]}")
+            # except Exception as e:
+            #     logger.error(f"单条翻译失败: {e.with_traceback()}")
+            #     translate_result[key] = f"{value}\n "
         return translate_result
 
     def translate_single_batch(self, original_subtitle: Dict[int,str], callback = None) -> Dict[int,str]:
         """直接大批翻译字幕"""
         translate_result = {}
-        text = ""
-
-        i, total_lines = 1, len(original_subtitle)      # line numbers starts with 1, not 0
-        logger.info(f"total lines:{total_lines}")
-        reSearch = re.compile(r'^\[\[(\d+)\]\](.*)', re.MULTILINE) # Compile it so it can run faster
-
-        """try:"""
-        while i <= total_lines:
-            text = ""
-            for j in range(self.batch_num): # Do it 10 sentence at a time
-                text += f"\n[[{i}]]{original_subtitle[str(i)]}"
-                i += 1
-                if i > total_lines:  # Reach the end
-                    break
-
-            logger.info(f"Translating lines up to {i}")
-            # logger.info(text)
+        previous_sentence = ""
+        previous_translation = ""
+        for key, value in original_subtitle.items():
+
+            content = SINGLE_BATCH_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language
+                ).replace( "[PreviousSentence]", previous_sentence
+                ).replace( "[PreviousTranslation]", previous_translation)
+
+            # logger.info(f"prompt:{content}")
+
             message = [{"role": "system",
-                "content": SINGLE_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language)},
-                {"role": "user", "content": text}]
+                "content": content},
+                {"role": "user", "content": value}]
+
+            previous_sentence = value
+
             response = self.client.chat.completions.create(
                 model=self.model,
                 stream=False,
                 messages=message)
-            translate = response.choices[0].message.content
-            # logger.info("returned text:\n" + translate)
-            result_lines = reSearch.findall(translate)
-            # Add it to result dict
-            seg = {}
-            for j in range(len(result_lines)):
-                line:str = result_lines[j][0]
-                if line.isdigit():
-                    seg[line] = result_lines[j][1]
+
+            return_text = response.choices[0].message.content
+            # logger.info(f"response:{type(return_text)}")
+            previous_translation = return_text
+
+            line = {str(key): return_text}  # Create a dictionary with key and translated text
 
             if callback:
                 # report the progress
-                callback(seg)
+                callback(line)
 
-            translate_result.update(seg)      # Add seg to result
+            translate_result.update(line)      # Add line to result
         """
         except Exception as e:
             logger.error(f"批量单句翻译失败{e}")

diff --git a/app/core/subtitle_processor/split_by_llm.py b/app/core/subtitle_processor/split_by_llm.py
@@ -67,7 +67,7 @@ def split_by_llm(text: str,
                  model: str = "gpt-4o-mini", 
                  use_cache: bool = False,
                  max_word_count_cjk: int = 18,
-                 max_word_count_english: int = 12) -> List[str]:
+                 max_word_count_english: int = 32) -> List[str]:
     """
     包装 split_by_llm_retry 函数，确保在重试全部失败后返回空列表
     """

diff --git a/app/core/subtitle_processor/subtitle_config.py b/app/core/subtitle_processor/subtitle_config.py
@@ -296,6 +296,13 @@
 SINGLE_TRANSLATE_PROMPT = """
 You are a professional [TargetLanguage] translator. 
 Please translate the following text into [TargetLanguage]. 
-Strictly maintain one-to-one correspondence for each line with the translation.
+Return the translation result directly without any explanation or other content.
+"""
+
+SINGLE_BATCH_TRANSLATE_PROMPT = """
+You are a professional [TargetLanguage] translator. 
+The previous sentence is: "[PreviousSentence]"
+and it was translated as: "[PreviousTranslation]".
+Please translate the following text into [TargetLanguage] and don't repeat the previous translation.
 Return the translation result directly without any explanation or other content.
 """
diff --git a/app/core/thread/create_task_thread.py b/app/core/thread/create_task_thread.py
@@ -88,7 +88,7 @@ def create_file_task(self, file_path):
         if video_info.audio_codec in ["aac", "mp3", "pcm"]:
             audio_format = "copy"
 
-        if cfg.subtitle_output_format.value.value == "ass" and ass_style_path.exists():
+        if cfg.subtitle_output_format.value.value == "ass":
             ass_style_name = cfg.subtitle_style_name.value
             ass_style_path = SUBTITLE_STYLE_PATH / f"{ass_style_name}.txt"
             subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
@@ -353,6 +353,7 @@ def create_transcription_task(self, file_path):
             original_subtitle_save_path=str(original_subtitle_save_path),
             result_subtitle_save_path=str(result_subtitle_save_path),
             subtitle_style_srt=subtitle_style_srt,
+            subtitle_layout=cfg.subtitle_layout.value,
             max_word_count_cjk=cfg.max_word_count_cjk.value,
             max_word_count_english=cfg.max_word_count_english.value,
             # Added by Philip

diff --git a/app/core/thread/subtitle_optimization_thread.py b/app/core/thread/subtitle_optimization_thread.py
@@ -129,18 +129,17 @@ def run(self):
             asr_data = from_subtitle_file(str_path)
 
             # 检查是否需要合并重新断句
-            if need_optimize:
-                if not asr_data.is_word_timestamp() and need_split and self.task.faster_whisper_one_word:
-                    asr_data.split_to_word_segments()
-                if asr_data.is_word_timestamp():
-                    self.progress.emit(15, self.tr("字幕断句..."))
-                    logger.info("正在字幕断句...")
-                    asr_data = merge_segments(asr_data, model=llm_model, 
-                                            num_threads=thread_num, 
-                                            max_word_count_cjk=max_word_count_cjk, 
-                                            max_word_count_english=max_word_count_english)
-                    asr_data.save(save_path=split_path)
-                    self.update_all.emit(asr_data.to_json())
+            if not asr_data.is_word_timestamp() and need_split and self.task.faster_whisper_one_word:
+                asr_data.split_to_word_segments()
+            if asr_data.is_word_timestamp():
+                self.progress.emit(15, self.tr("字幕断句..."))
+                logger.info("正在字幕断句...")
+                asr_data = merge_segments(asr_data, model=llm_model, 
+                                        num_threads=thread_num, 
+                                        max_word_count_cjk=max_word_count_cjk, 
+                                        max_word_count_english=max_word_count_english)
+                asr_data.save(save_path=split_path)
+                self.update_all.emit(asr_data.to_json())
 
             # 制作成请求llm接口的格式 {{"1": "original_subtitle"},...}
             subtitle_json = {str(k): v["original_subtitle"] for k, v in asr_data.to_json().items()}
@@ -182,10 +181,10 @@ def run(self):
                 )
                 optimizer_result = self.optimizer.translate_single_batch(subtitle_json, callback=self.callback)
 
-            # 替换优化或者翻译后的字幕
+            # 加入优化或者翻译后的字幕
             for i, subtitle_text in optimizer_result.items():
                 seg = asr_data.segments[int(i) - 1]
-                seg.text = subtitle_text
+                seg.text = seg.text + "\n" + subtitle_text
 
             # 保存字幕
             if result_subtitle_save_path.endswith(".ass"):

diff --git a/app/core/thread/transcript_thread.py b/app/core/thread/transcript_thread.py
@@ -162,17 +162,23 @@ def run(self):
             asr_data = self.asr.run(callback=self.progress_callback)
 
             # Check if asr_data needs to add minimum length
-            if cfg.subtitle_enable_sentence_minimum_time:
+            if cfg.subtitle_enable_sentence_minimum_time.value:
                 asr_data.add_minimum_len(cfg.subtitle_sentence_minimum_time.value)
 
+            # If time offset is not zero, adjust the timestamps
+            if cfg.time_offset.value != 0:
+                for seg in asr_data.segments:
+                    seg.start_time += cfg.time_offset.value
+                    seg.end_time += cfg.time_offset.value
+
             # 保存字幕文件
             original_subtitle_path = Path(self.task.original_subtitle_save_path)
             original_subtitle_path.parent.mkdir(parents=True, exist_ok=True)
             asr_data.to_srt(save_path=str(original_subtitle_path))
             logger.info("源字幕文件已保存到: %s", self.task.original_subtitle_save_path)
 
-            if self.task.result_subtitle_save_path:
-                # Make a copy to result dir as well, if exist
+            if self.task.type == Task.Type.TRANSCRIBE and self.task.result_subtitle_save_path:
+                # Make a copy to result dir as well, if this is only a transcribe task
                 asr_data.save(
                     save_path=self.task.result_subtitle_save_path,
                     ass_style=self.task.subtitle_style_srt,

diff --git a/app/core/thread/video_synthesis_thread.py b/app/core/thread/video_synthesis_thread.py
@@ -37,10 +37,10 @@ def run(self):
             logger.info(f"时间：{datetime.datetime.now()}")
             self.task.status = Task.Status.SYNTHESIZING
             video_file = self.task.file_path
-            if Path(self.task.result_subtitle_save_path).is_file():
+            if Path(self.task.original_subtitle_save_path).is_file():
                 # result sub exist (after optimizing)
                 subtitle_file = self.task.result_subtitle_save_path
-            elif Path(self.task.original_subtitle_save_path).is_file():
+            elif Path(self.task.result_subtitle_save_path).is_file():
                 # No optimzing, original sub only
                 subtitle_file = self.task.original_subtitle_save_path
             else: