Skip to content

Commit

Permalink
Add more controls to video/sub synthesis
Browse files Browse the repository at this point in the history
  • Loading branch information
philpw99 committed Jan 21, 2025
1 parent fedf6a7 commit ce9e5e6
Show file tree
Hide file tree
Showing 18 changed files with 1,255 additions and 569 deletions.
5 changes: 5 additions & 0 deletions app/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ class Config(QConfig):
0,
RangeValidator(-5000, 5000)
)

vertical_offset = RangeConfigItem(
"Subtitle", "VerticalOffset",
0, RangeValidator(-500, 500)
)

# ------------------- 软件页面配置 -------------------
micaEnabled = ConfigItem("MainWindow", "MicaEnabled", False, BoolValidator())
Expand Down
16 changes: 10 additions & 6 deletions app/common/enums.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# Set the enums to new translated values
from PyQt5.QtCore import QObject
from ..core.entities import SubtitleLayoutEnum, InternetTranslateEnum, TodoWhenDoneEnum, Task
from ..core.entities import SubtitleLayoutEnum, InternetTranslateEnum, TodoWhenDoneEnum, Task, BatchTaskTypeEnum

def Enums_Translate():
qoEnums = QObject()
BatchTaskTypeEnum.TRANSCRIBE._value_ = qoEnums.tr("Create Subtitle from Audio/Video")
BatchTaskTypeEnum.SOFT._value_ = qoEnums.tr("Create Soft Subtitle Video")
BatchTaskTypeEnum.HARD._value_ = qoEnums.tr("Create Hard Subtitle Video")

SubtitleLayoutEnum.ONLY_ORIGINAL._value_ = qoEnums.tr("Original Only")
SubtitleLayoutEnum.ONLY_TRANSLATE._value_ = qoEnums.tr("Translated Only")
SubtitleLayoutEnum.ORIGINAL_ON_TOP._value_ = qoEnums.tr("Original on Top")
Expand Down Expand Up @@ -34,8 +38,8 @@ def Enums_Translate():
Task.Source.FILE_IMPORT._value_ = qoEnums.tr("File Import")
Task.Source.URL_IMPORT._value_ = qoEnums.tr("URL Import")

Task.Type.OPTIMIZE._value_ = qoEnums.tr("Optimize")
Task.Type.SUBTITLE._value_ = qoEnums.tr("Subtitle")
Task.Type.SYNTHESIS._value_ = qoEnums.tr("Synthesis")
Task.Type.TRANSCRIBE._value_ = qoEnums.tr("Transcribe")
Task.Type.URL._value_ = qoEnums.tr("URL Import")
Task.Type.OPTIMIZE._value_ = qoEnums.tr("Optimize + Translate Subtitles")
Task.Type.SUBTITLE._value_ = qoEnums.tr("Add Subtitle To Video")
Task.Type.SYNTHESIS._value_ = qoEnums.tr("Combine Subtitle with Video")
Task.Type.TRANSCRIBE._value_ = qoEnums.tr("Get Subtitle From Video/Audio")
Task.Type.URL._value_ = qoEnums.tr("Download Video from URL then Add Subtitle")
9 changes: 9 additions & 0 deletions app/common/signal_bus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ class SignalBus(QObject):
# 使用网络翻译
internet_translation_changed = pyqtSignal(bool)
internet_translation_method_changed = pyqtSignal(bool)
#
need_video_changed = pyqtSignal(bool)
soft_subtitle_changed = pyqtSignal(bool)

# 新增视频控制相关信号
video_play = pyqtSignal() # 播放信号
Expand All @@ -23,6 +26,12 @@ class SignalBus(QObject):
video_segment_play = pyqtSignal(int, int) # 播放片段信号,参数为开始和结束时间(ms)
video_subtitle_added = pyqtSignal(str) # 添加字幕文件信号

def on_need_video_changed(self, needVideo: bool):
self.need_video_changed.emit(needVideo)

def on_soft_subtitle_changed(self, softSubtitle: bool):
self.soft_subtitle_changed.emit(softSubtitle)

def on_subtitle_layout_changed(self, layout: str):
self.subtitle_layout_changed.emit(layout)

Expand Down
34 changes: 27 additions & 7 deletions app/core/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
from typing import Optional
from PyQt5.QtCore import QObject

class BatchTaskTypeEnum(Enum):
""" 批量任务类型 """
TRANSCRIBE = "Create Subtitle from Audio/Video"
SOFT = "Create Soft Subtitle Video"
HARD = "Create Hard Subtitle Video"

class SubtitleLayoutEnum(Enum):
""" 字幕布局 """
ONLY_ORIGINAL = "Original Only"
Expand All @@ -19,6 +25,15 @@ class InternetTranslateEnum(Enum):
GOOGLE = "Google Translate"


class SupportedImageFormats(Enum):
""" 支持的图片格式 """
JPG = "jpg"
PNG = "png"
BMP = "bmp"
GIF = "gif"
WEBP = "webp"


class SupportedAudioFormats(Enum):
""" 支持的音频格式 """
AAC = "aac"
Expand Down Expand Up @@ -492,11 +507,11 @@ class Source(Enum):

class Type(Enum):
# 任务类型:transcribe or generate subtitle
TRANSCRIBE = "Transcription"
SUBTITLE = "Subtitle"
OPTIMIZE = "Optimization"
SYNTHESIS = "Synthesis"
URL = "URL"
TRANSCRIBE = "Get Subtitle From Video/Audio"
SUBTITLE = "Add Subtitle To Video"
OPTIMIZE = "Optimize + Translate Subtitles"
SYNTHESIS = "Combine Subtitle with Video"
URL = "Download Video from URL then Add Subtitle"

# 任务信息
id: int = field(default_factory=lambda: randint(0, 100_000_000))
Expand All @@ -522,7 +537,6 @@ class Type(Enum):

# 转录(转录模型)
transcribe_model: Optional[TranscribeModelEnum] = TranscribeModelEnum.JIANYING

transcribe_language: Optional[TranscribeLanguageEnum] = LANGUAGES[TranscribeLanguageEnum.ENGLISH.value]
use_asr_cache: bool = True
need_word_time_stamp: bool = False
Expand Down Expand Up @@ -559,10 +573,16 @@ class Type(Enum):
subtitle_layout: Optional[str] = None
max_word_count_cjk: int = 12
max_word_count_english: int = 18
need_split: bool = True
need_split: bool = False

# 视频生成
need_video: bool = True
video_save_path: Optional[str] = None
soft_subtitle: bool = True
subtitle_style_srt: Optional[str] = None
portrait: bool = False
portrait_background: Optional[str] = None
zoom_video: int = 100
zoom_subtitle: int = 100
vertical_offset: int = 0

2 changes: 2 additions & 0 deletions app/core/subtitle_processor/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,8 @@ def translate_single_batch(self, original_subtitle: Dict[int,str], callback = No
return_text = response.choices[0].message.content
# logger.info(f"response:{type(return_text)}")
previous_translation = return_text

logger.info(f"{key}. Original: {value}\n{key}. Translated: {return_text}")

line = {str(key): return_text} # Create a dictionary with key and translated text

Expand Down
37 changes: 19 additions & 18 deletions app/core/thread/create_task_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,28 @@ class CreateTaskThread(QThread):
progress = pyqtSignal(int, str)
error = pyqtSignal(str)

def __init__(self, file_path, task_type: Task.Type):
def __init__(self, file_path, task_type: Task.Type, soft_sub: bool):
super().__init__()
self.file_path = file_path
self.task_type = task_type
self.soft_sub = soft_sub

def run(self):
try:
if self.task_type == Task.Type.SUBTITLE:
self.create_file_task(self.file_path)
self.create_file_task(self.file_path, self.soft_sub)
elif self.task_type == Task.Type.URL:
self.create_url_task(self.file_path)
self.create_url_task(self.file_path, self.soft_sub)
elif self.task_type == Task.Type.TRANSCRIBE:
self.create_transcription_task(self.file_path)
elif self.task_type == Task.Type.OPTIMIZE:
self.create_subtitle_optimization_task()
elif self.task_type == Task.Type.SYNTHESIS:
self.create_video_synthesis_task()
else:
raise ValueError("No matching task type.")
except Exception as e:
logger.exception("创建任务失败: %s", str(e))
self.progress.emit(0, self.tr("创建任务失败"))
self.error.emit(str(e))

def create_file_task(self, file_path):
def create_file_task(self, file_path, soft_sub: bool):
logger.info("\n===================")
logger.info(f"开始创建文件任务:{file_path}")
# 使用 Path 对象处理路径
Expand Down Expand Up @@ -91,7 +88,8 @@ def create_file_task(self, file_path):
if cfg.subtitle_output_format.value.value == "ass":
ass_style_name = cfg.subtitle_style_name.value
ass_style_path = SUBTITLE_STYLE_PATH / f"{ass_style_name}.txt"
subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
if ass_style_path.exists():
subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
else:
subtitle_style_srt = None

Expand Down Expand Up @@ -149,16 +147,17 @@ def create_file_task(self, file_path):
result_subtitle_save_path=str(result_subtitle_save_path),
subtitle_layout=cfg.subtitle_layout.value,
video_save_path=str(video_save_path),
soft_subtitle=cfg.soft_subtitle.value,
soft_subtitle=soft_sub,
subtitle_style_srt=subtitle_style_srt,
need_video=cfg.need_video.value,
vertical_offset=cfg.vertical_offset.value,
type=Task.Type.SUBTITLE,
)
self.finished.emit(task)
self.progress.emit(100, self.tr("创建任务完成"))
logger.info(f"文件任务创建完成:{task}")

def create_url_task(self, url, task_type):
def create_url_task(self, url, soft_sub: bool):
logger.info("\n===================")
logger.info(f"开始创建URL任务:{url}")
self.progress.emit(5, self.tr("正在获取视频信息"))
Expand Down Expand Up @@ -213,7 +212,8 @@ def create_url_task(self, url, task_type):
if cfg.subtitle_output_format.value.value == "ass" and ass_style_path.exists():
ass_style_name = cfg.subtitle_style_name.value
ass_style_path = SUBTITLE_STYLE_PATH / f"{ass_style_name}.txt"
subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
if ass_style_path.exists():
subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
else:
subtitle_style_srt = None

Expand Down Expand Up @@ -266,9 +266,10 @@ def create_url_task(self, url, task_type):
result_subtitle_save_path=str(result_subtitle_save_path),
subtitle_layout=cfg.subtitle_layout.value,
video_save_path=str(video_save_path),
soft_subtitle=cfg.soft_subtitle.value,
soft_subtitle=soft_sub,
subtitle_style_srt=subtitle_style_srt,
need_video=cfg.need_video.value,
vertical_offset=cfg.vertical_offset.value,
task=Task.Type.SUBTITLE,
)
self.finished.emit(task)
Expand Down Expand Up @@ -307,10 +308,11 @@ def create_transcription_task(self, file_path):
original_subtitle_save_path = task_work_dir / f"【原始字幕】{file_name}-{cfg.transcribe_model.value.value}-{whisper_type}.srt"
result_subtitle_save_path = file_full_path.parent / ( cfg.subtitle_file_prefix.value + file_name + cfg.subtitle_file_suffix.value + "." + cfg.subtitle_output_format.value.value )

if cfg.subtitle_output_format.value.value == "ass" and ass_style_path.exists():
if cfg.subtitle_output_format.value.value == "ass":
ass_style_name = cfg.subtitle_style_name.value
ass_style_path = SUBTITLE_STYLE_PATH / f"{ass_style_name}.txt"
subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
if ass_style_path.exists():
subtitle_style_srt = ass_style_path.read_text(encoding="utf-8")
else:
subtitle_style_srt = None

Expand Down Expand Up @@ -414,7 +416,7 @@ def create_subtitle_optimization_task(file_path):
logger.info(f"字幕优化任务创建完成:{task}")
return task

def create_video_synthesis_task(subtitle_file, video_file):
def create_video_synthesis_task(subtitle_file, video_file, soft_sub: bool):
logger.info(f"开始创建视频合成任务:{subtitle_file} {video_file}")
subtitle_file = Path(subtitle_file.strip()).as_posix()
video_file = Path(video_file.strip()).as_posix()
Expand All @@ -430,7 +432,7 @@ def create_video_synthesis_task(subtitle_file, video_file):
status=Task.Status.GENERATING,
work_dir=str(task_work_dir),
file_path=str(Path(video_file)),
result_subtitle_save_path=str(Path(subtitle_file)),
original_subtitle_save_path=str(Path(subtitle_file)),
video_save_path=str(video_save_path),
soft_subtitle=cfg.soft_subtitle.value,
type=Task.Type.SYNTHESIS,
Expand Down Expand Up @@ -500,7 +502,6 @@ def sanitize_filename(name, replacement="_"):
# 如果文件名为空,返回一个默认名称
if not sanitized:
sanitized = "default_filename"

return sanitized


Expand Down
7 changes: 5 additions & 2 deletions app/core/thread/subtitle_optimization_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ def run(self):
asr_data = from_subtitle_file(str_path)

# 检查是否需要合并重新断句
if not asr_data.is_word_timestamp() and need_split and self.task.faster_whisper_one_word:
is_word_split = asr_data.is_word_timestamp()
if not is_word_split and need_split and self.task.faster_whisper_one_word:
asr_data.split_to_word_segments()
if asr_data.is_word_timestamp():
if is_word_split:
self.progress.emit(15, self.tr("字幕断句..."))
logger.info("正在字幕断句...")
asr_data = merge_segments(asr_data, model=llm_model,
Expand All @@ -141,6 +142,8 @@ def run(self):
asr_data.save(save_path=split_path)
self.update_all.emit(asr_data.to_json())



# 制作成请求llm接口的格式 {{"1": "original_subtitle"},...}
subtitle_json = {str(k): v["original_subtitle"] for k, v in asr_data.to_json().items()}
self.subtitle_length = len(subtitle_json)
Expand Down
17 changes: 9 additions & 8 deletions app/core/thread/subtitle_pipeline_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,16 @@ def handle_error(error_msg):

# 3. 视频合成
# self.task.status = Task.Status.GENERATING
self.progress.emit(80, self.tr("开始合成视频"))
synthesis_thread = VideoSynthesisThread(self.task)
synthesis_thread.progress.connect(lambda value, msg: self.progress.emit(int(70 + value * 0.3), msg))
synthesis_thread.error.connect(handle_error)
synthesis_thread.run()
if self.task.need_video:
self.progress.emit(80, self.tr("开始合成视频"))
synthesis_thread = VideoSynthesisThread(self.task)
synthesis_thread.progress.connect(lambda value, msg: self.progress.emit(int(70 + value * 0.3), msg))
synthesis_thread.error.connect(handle_error)
synthesis_thread.run()

if self.has_error:
logger.info("视频合成过程中发生错误,终止流程")
return
if self.has_error:
logger.info("视频合成过程中发生错误,终止流程")
return

self.task.status = Task.Status.COMPLETED
logger.info("处理完成")
Expand Down
35 changes: 29 additions & 6 deletions app/core/thread/video_synthesis_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from pathlib import Path
from PyQt5.QtCore import QThread, pyqtSignal

from ..entities import Task
from ..utils.video_utils import add_subtitles
from ..entities import Task, VideoInfo
from ..utils.video_utils import add_subtitles, get_video_info
from ..utils.logger import setup_logger
from ...common.config import cfg

Expand Down Expand Up @@ -37,12 +37,12 @@ def run(self):
logger.info(f"时间:{datetime.datetime.now()}")
self.task.status = Task.Status.SYNTHESIZING
video_file = self.task.file_path
if Path(self.task.original_subtitle_save_path).is_file():
if self.task.original_subtitle_save_path and Path(self.task.original_subtitle_save_path).is_file():
# result sub exist (after optimizing)
subtitle_file = self.task.result_subtitle_save_path
elif Path(self.task.result_subtitle_save_path).is_file():
# No optimzing, original sub only
subtitle_file = self.task.original_subtitle_save_path
elif self.task.result_subtitle_save_path and Path(self.task.result_subtitle_save_path).is_file():
# No optimzing, original sub only
subtitle_file = self.task.result_subtitle_save_path
else:
raise RuntimeError("No subtitle file available.")

Expand All @@ -61,7 +61,30 @@ def run(self):
logger.info(f"开始合成视频: {video_file}")
self.progress.emit(10, self.tr("正在合成"))
self.progress.emit(11, f"Soft subtitle:{soft_subtitle}")
if not self.task.video_info:
video_info = get_video_info(video_file)
w = video_info["width"]
h = video_info["height"]
duration = int(video_info["duration_seconds"])
else:
w = self.task.video_info.width
h = self.task.video_info.height
duration = int(self.task.video_info.duration_seconds)

if self.task.portrait:
width = h
height = w
else:
width = w
height = h

add_subtitles(video_file, subtitle_file, video_save_path, soft_subtitle=soft_subtitle,
output_width=width, output_height=height, portrait=self.task.portrait,
vertical_offset=self.task.vertical_offset,
portrait_background=self.task.portrait_background,
duration=duration,
zoom_video=self.task.zoom_video,
zoom_subtitle=self.task.zoom_subtitle,
progress_callback=self.progress_callback)
self.progress.emit(100, self.tr("合成完成"))
logger.info(f"视频合成完成,保存路径: {video_save_path}")
Expand Down
Loading

0 comments on commit ce9e5e6

Please sign in to comment.