Skip to content

Commit

Permalink
Many bug fixes and adding time offset
Browse files Browse the repository at this point in the history
  • Loading branch information
philpw99 committed Jan 19, 2025
1 parent 18d0031 commit fedf6a7
Show file tree
Hide file tree
Showing 19 changed files with 317 additions and 226 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*.pyc
*/__pycache__/
*.env
.*

# 测试和脚本
/test/
Expand All @@ -24,4 +25,4 @@
# 应用数据
/AppData/
/output/
/work-dir/
/work-dir/
6 changes: 6 additions & 0 deletions app/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ class Config(QConfig):
1500, RangeValidator(500, 3000)
)

time_offset = RangeConfigItem(
"Subtitle", "TimeOffset",
0,
RangeValidator(-5000, 5000)
)

# ------------------- 软件页面配置 -------------------
micaEnabled = ConfigItem("MainWindow", "MicaEnabled", False, BoolValidator())
dpiScale = OptionsConfigItem(
Expand Down
2 changes: 1 addition & 1 deletion app/core/bk_asr/ASRData.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ def from_vtt(vtt_str: str) -> 'ASRData':
"""
segments = []
# 跳过头部元数据
content = vtt_str.split('\n\n')[2:]
content = vtt_str.split('\n\n')[1:]

timestamp_pattern = re.compile(r'(\d{2}):(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})\.(\d{3})')

Expand Down
114 changes: 60 additions & 54 deletions app/core/subtitle_processor/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import os
from concurrent.futures import ThreadPoolExecutor
import re
import re, json
from typing import Dict

import retry
Expand All @@ -12,7 +12,8 @@
TRANSLATE_PROMPT,
OPTIMIZER_PROMPT,
REFLECT_TRANSLATE_PROMPT,
SINGLE_TRANSLATE_PROMPT
SINGLE_TRANSLATE_PROMPT,
SINGLE_BATCH_TRANSLATE_PROMPT
)
from ..subtitle_processor.aligner import SubtitleAligner
from ..utils import json_repair
Expand Down Expand Up @@ -86,20 +87,29 @@ def optimizer_multi_thread(self, subtitle_json: Dict[int, str],
chunks = [dict(items[i:i + batch_num]) for i in range(0, len(items), batch_num)]

def process_chunk(chunk):
failed = False
if translate:
try:
result = self.translate(chunk, reflect)
except Exception as e:
logger.error(f"翻译失败,使用单条翻译:{e}")
result = self.translate_single(chunk)
# logger.info(f"chunk len:{len(chunk)}\n{chunk}")
failed = True
# result = self.translate_single(chunk)
else:
try:
result = self.optimize(chunk)
except Exception as e:
logger.error(f"优化失败:{e}")
result = chunk

if failed:
result = self.translate_single(chunk)
failed = False

if callback:
callback(result)
if isinstance(result, Dict):
callback(result)
return result

results = list(self.executor.map(process_chunk, chunks))
Expand All @@ -108,7 +118,7 @@ def process_chunk(chunk):
optimizer_result = {k: v for result in results for k, v in result.items()}
return optimizer_result

@retry.retry(tries=2)
@retry.retry(tries=1)
def optimize(self, original_subtitle: Dict[int, str]) -> Dict[int, str]:
""" Optimize the given subtitle. """
logger.info(f"[+]正在优化字幕:{next(iter(original_subtitle))} - {next(reversed(original_subtitle))}")
Expand All @@ -133,7 +143,7 @@ def optimize(self, original_subtitle: Dict[int, str]) -> Dict[int, str]:
self.llm_result_logger.info("===========")
return aligned_subtitle

@retry.retry(tries=2)
@retry.retry(tries=1)
def translate(self, original_subtitle: Dict[int, str], reflect=False) -> Dict[int, str]:
"""优化并翻译给定的字幕。"""
if reflect:
Expand All @@ -153,6 +163,7 @@ def _reflect_translate(self, original_subtitle: Dict[int, str]):
# print(response_content)
optimized_text = {k: v["optimized_subtitle"] for k, v in response_content.items()} # 字幕文本
aligned_subtitle = repair_subtitle(original_subtitle, optimized_text) # 修复字幕对齐问题
# print(aligned_subtitle)
# 在 translations 中查找对应的翻译 文本-翻译 映射
translations = {item["optimized_subtitle"]: item["revised_translation"] for item in response_content.values()}

Expand Down Expand Up @@ -212,70 +223,65 @@ def _create_optimizer_message(self, original_subtitle):
{"role": "user", "content": input_content}]
return message

def translate_single(self, original_subtitle: Dict[int, str]) -> Dict[int, str]:
def translate_single(self, original_subtitles: Dict[int, str]) -> Dict[int, str]:
"""单条字幕翻译,用于在批量翻译失败时的备选方案"""
translate_result = {}
for key, value in original_subtitle.items():
try:
message = [{"role": "system",
"content": SINGLE_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language)},
{"role": "user", "content": value}]
response = self.client.chat.completions.create(
model=self.model,
stream=False,
messages=message)
translate = response.choices[0].message.content.replace("\n", "")
original_text = self.remove_punctuation(value)
translated_text = self.remove_punctuation(translate)
translate_result[key] = f"{original_text}\n{translated_text}"
logger.info(f"单条翻译结果: {translate_result[key]}")
except Exception as e:
logger.error(f"单条翻译失败: {e}")
translate_result[key] = f"{value}\n "
# logger.info(f"org sub:{original_subtitle}")
for key, value in original_subtitles.items():
# try:
message = [{"role": "system",
"content": SINGLE_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language)},
{"role": "user", "content": [value]}]
response = self.client.chat.completions.create(
model=self.model,
stream=False,
messages=message)
logger.info(f"response: {response}\n")
translate = response.choices[0].message.content.replace("\n", "")
original_text = self.remove_punctuation(value)
translated_text = self.remove_punctuation(translate)
translate_result[key] = f"{original_text}\n{translated_text}"
logger.info(f"单条翻译结果: {translate_result[key]}")
# except Exception as e:
# logger.error(f"单条翻译失败: {e.with_traceback()}")
# translate_result[key] = f"{value}\n "
return translate_result

def translate_single_batch(self, original_subtitle: Dict[int,str], callback = None) -> Dict[int,str]:
"""直接大批翻译字幕"""
translate_result = {}
text = ""

i, total_lines = 1, len(original_subtitle) # line numbers starts with 1, not 0
logger.info(f"total lines:{total_lines}")
reSearch = re.compile(r'^\[\[(\d+)\]\](.*)', re.MULTILINE) # Compile it so it can run faster

"""try:"""
while i <= total_lines:
text = ""
for j in range(self.batch_num): # Do it 10 sentence at a time
text += f"\n[[{i}]]{original_subtitle[str(i)]}"
i += 1
if i > total_lines: # Reach the end
break

logger.info(f"Translating lines up to {i}")
# logger.info(text)
previous_sentence = ""
previous_translation = ""
for key, value in original_subtitle.items():

content = SINGLE_BATCH_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language
).replace( "[PreviousSentence]", previous_sentence
).replace( "[PreviousTranslation]", previous_translation)

# logger.info(f"prompt:{content}")

message = [{"role": "system",
"content": SINGLE_TRANSLATE_PROMPT.replace("[TargetLanguage]", self.target_language)},
{"role": "user", "content": text}]
"content": content},
{"role": "user", "content": value}]

previous_sentence = value

response = self.client.chat.completions.create(
model=self.model,
stream=False,
messages=message)
translate = response.choices[0].message.content
# logger.info("returned text:\n" + translate)
result_lines = reSearch.findall(translate)
# Add it to result dict
seg = {}
for j in range(len(result_lines)):
line:str = result_lines[j][0]
if line.isdigit():
seg[line] = result_lines[j][1]

return_text = response.choices[0].message.content
# logger.info(f"response:{type(return_text)}")
previous_translation = return_text

line = {str(key): return_text} # Create a dictionary with key and translated text

if callback:
# report the progress
callback(seg)
callback(line)

translate_result.update(seg) # Add seg to result
translate_result.update(line) # Add line to result
"""
except Exception as e:
logger.error(f"批量单句翻译失败{e}")
Expand Down
2 changes: 1 addition & 1 deletion app/core/subtitle_processor/split_by_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def split_by_llm(text: str,
model: str = "gpt-4o-mini",
use_cache: bool = False,
max_word_count_cjk: int = 18,
max_word_count_english: int = 12) -> List[str]:
max_word_count_english: int = 32) -> List[str]:
"""
包装 split_by_llm_retry 函数,确保在重试全部失败后返回空列表
"""
Expand Down
9 changes: 8 additions & 1 deletion app/core/subtitle_processor/subtitle_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,13 @@
SINGLE_TRANSLATE_PROMPT = """
You are a professional [TargetLanguage] translator.
Please translate the following text into [TargetLanguage].
Strictly maintain one-to-one correspondence for each line with the translation.
Return the translation result directly without any explanation or other content.
"""

SINGLE_BATCH_TRANSLATE_PROMPT = """
You are a professional [TargetLanguage] translator.
The previous sentence is: "[PreviousSentence]"
and it was translated as: "[PreviousTranslation]".
Please translate the following text into [TargetLanguage] and don't repeat the previous translation.
Return the translation result directly without any explanation or other content.
"""
3 changes: 2 additions & 1 deletion app/core/thread/create_task_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def create_file_task(self, file_path):
if video_info.audio_codec in ["aac", "mp3", "pcm"]:
audio_format = "copy"

if cfg.subtitle_output_format.value.value == "ass" and ass_style_path.exists():
if cfg.subtitle_output_format.value.value == "ass":
ass_style_name = cfg.subtitle_style_name.value
ass_style_path = SUBTITLE_STYLE_PATH / f"{ass_style_name}.txt"
subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
Expand Down Expand Up @@ -353,6 +353,7 @@ def create_transcription_task(self, file_path):
original_subtitle_save_path=str(original_subtitle_save_path),
result_subtitle_save_path=str(result_subtitle_save_path),
subtitle_style_srt=subtitle_style_srt,
subtitle_layout=cfg.subtitle_layout.value,
max_word_count_cjk=cfg.max_word_count_cjk.value,
max_word_count_english=cfg.max_word_count_english.value,
# Added by Philip
Expand Down
27 changes: 13 additions & 14 deletions app/core/thread/subtitle_optimization_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,17 @@ def run(self):
asr_data = from_subtitle_file(str_path)

# 检查是否需要合并重新断句
if need_optimize:
if not asr_data.is_word_timestamp() and need_split and self.task.faster_whisper_one_word:
asr_data.split_to_word_segments()
if asr_data.is_word_timestamp():
self.progress.emit(15, self.tr("字幕断句..."))
logger.info("正在字幕断句...")
asr_data = merge_segments(asr_data, model=llm_model,
num_threads=thread_num,
max_word_count_cjk=max_word_count_cjk,
max_word_count_english=max_word_count_english)
asr_data.save(save_path=split_path)
self.update_all.emit(asr_data.to_json())
if not asr_data.is_word_timestamp() and need_split and self.task.faster_whisper_one_word:
asr_data.split_to_word_segments()
if asr_data.is_word_timestamp():
self.progress.emit(15, self.tr("字幕断句..."))
logger.info("正在字幕断句...")
asr_data = merge_segments(asr_data, model=llm_model,
num_threads=thread_num,
max_word_count_cjk=max_word_count_cjk,
max_word_count_english=max_word_count_english)
asr_data.save(save_path=split_path)
self.update_all.emit(asr_data.to_json())

# 制作成请求llm接口的格式 {{"1": "original_subtitle"},...}
subtitle_json = {str(k): v["original_subtitle"] for k, v in asr_data.to_json().items()}
Expand Down Expand Up @@ -182,10 +181,10 @@ def run(self):
)
optimizer_result = self.optimizer.translate_single_batch(subtitle_json, callback=self.callback)

# 替换优化或者翻译后的字幕
# 加入优化或者翻译后的字幕
for i, subtitle_text in optimizer_result.items():
seg = asr_data.segments[int(i) - 1]
seg.text = subtitle_text
seg.text = seg.text + "\n" + subtitle_text

# 保存字幕
if result_subtitle_save_path.endswith(".ass"):
Expand Down
12 changes: 9 additions & 3 deletions app/core/thread/transcript_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,17 +162,23 @@ def run(self):
asr_data = self.asr.run(callback=self.progress_callback)

# Check if asr_data needs to add minimum length
if cfg.subtitle_enable_sentence_minimum_time:
if cfg.subtitle_enable_sentence_minimum_time.value:
asr_data.add_minimum_len(cfg.subtitle_sentence_minimum_time.value)

# If time offset is not zero, adjust the timestamps
if cfg.time_offset.value != 0:
for seg in asr_data.segments:
seg.start_time += cfg.time_offset.value
seg.end_time += cfg.time_offset.value

# 保存字幕文件
original_subtitle_path = Path(self.task.original_subtitle_save_path)
original_subtitle_path.parent.mkdir(parents=True, exist_ok=True)
asr_data.to_srt(save_path=str(original_subtitle_path))
logger.info("源字幕文件已保存到: %s", self.task.original_subtitle_save_path)

if self.task.result_subtitle_save_path:
# Make a copy to result dir as well, if exist
if self.task.type == Task.Type.TRANSCRIBE and self.task.result_subtitle_save_path:
# Make a copy to result dir as well, if this is only a transcribe task
asr_data.save(
save_path=self.task.result_subtitle_save_path,
ass_style=self.task.subtitle_style_srt,
Expand Down
4 changes: 2 additions & 2 deletions app/core/thread/video_synthesis_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def run(self):
logger.info(f"时间:{datetime.datetime.now()}")
self.task.status = Task.Status.SYNTHESIZING
video_file = self.task.file_path
if Path(self.task.result_subtitle_save_path).is_file():
if Path(self.task.original_subtitle_save_path).is_file():
# result sub exist (after optimizing)
subtitle_file = self.task.result_subtitle_save_path
elif Path(self.task.original_subtitle_save_path).is_file():
elif Path(self.task.result_subtitle_save_path).is_file():
# No optimzing, original sub only
subtitle_file = self.task.original_subtitle_save_path
else:
Expand Down
Loading

0 comments on commit fedf6a7

Please sign in to comment.