Skip to content

Commit

Permalink
project-sブランチをmainブランチにマージ (#1029)
Browse files Browse the repository at this point in the history
  • Loading branch information
Hiroshiba authored Jan 27, 2024
2 parents 0c94f68 + 2f4c1ff commit ea76515
Show file tree
Hide file tree
Showing 11 changed files with 863 additions and 17 deletions.
1 change: 1 addition & 0 deletions engine_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"port": 50021,
"icon": "engine_manifest_assets/icon.png",
"default_sampling_rate": 24000,
"frame_rate": 93.75,
"terms_of_service": "engine_manifest_assets/terms_of_service.md",
"update_infos": "engine_manifest_assets/update_infos.json",
"dependency_licenses": "engine_manifest_assets/dependency_licenses.json",
Expand Down
65 changes: 65 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@
AudioQuery,
BaseLibraryInfo,
DownloadableLibraryInfo,
FrameAudioQuery,
InstalledLibraryInfo,
MorphableTargetInfo,
ParseKanaBadRequest,
ParseKanaError,
Score,
Speaker,
SpeakerInfo,
StyleIdNotFoundError,
Expand Down Expand Up @@ -640,6 +642,69 @@ def _synthesis_morphing(
background=BackgroundTask(delete_file, f.name),
)

@app.post(
"/sing_frame_audio_query",
response_model=FrameAudioQuery,
tags=["クエリ作成"],
summary="歌唱音声合成用のクエリを作成する",
)
def sing_frame_audio_query(
score: Score,
style_id: StyleId = Query(alias="speaker"), # noqa: B008
core_version: str | None = None,
) -> FrameAudioQuery:
"""
歌唱音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま歌唱音声合成に利用できます。各値の意味は`Schemas`を参照してください。
"""
engine = get_engine(core_version)
core = get_core(core_version)
phonemes, f0, volume = engine.create_sing_phoneme_and_f0_and_volume(
score, style_id
)

return FrameAudioQuery(
f0=f0,
volume=volume,
phonemes=phonemes,
volumeScale=1,
outputSamplingRate=core.default_sampling_rate,
outputStereo=False,
)

@app.post(
"/frame_synthesis",
response_class=FileResponse,
responses={
200: {
"content": {
"audio/wav": {"schema": {"type": "string", "format": "binary"}}
},
}
},
tags=["音声合成"],
)
def frame_synthesis(
query: FrameAudioQuery,
style_id: StyleId = Query(alias="speaker"), # noqa: B008
core_version: str | None = None,
) -> FileResponse:
"""
歌唱音声合成を行います。
"""
engine = get_engine(core_version)
wave = engine.frame_synthsize_wave(query, style_id)

with NamedTemporaryFile(delete=False) as f:
soundfile.write(
file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
)

return FileResponse(
f.name,
media_type="audio/wav",
background=BackgroundTask(delete_file, f.name),
)

@app.post(
"/connect_waves",
response_class=FileResponse,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,10 @@
"title": "依存関係のライセンス情報",
"type": "array"
},
"frame_rate": {
"title": "エンジンのフレームレート",
"type": "number"
},
"icon": {
"title": "エンジンのアイコンをBASE64エンコードしたもの",
"type": "string"
Expand Down Expand Up @@ -276,6 +280,7 @@
"url",
"icon",
"default_sampling_rate",
"frame_rate",
"terms_of_service",
"update_infos",
"dependency_licenses",
Expand All @@ -284,6 +289,73 @@
"title": "EngineManifest",
"type": "object"
},
"FrameAudioQuery": {
"description": "フレームごとの音声合成用のクエリ",
"properties": {
"f0": {
"items": {
"type": "number"
},
"title": "フレームごとの基本周波数",
"type": "array"
},
"outputSamplingRate": {
"title": "音声データの出力サンプリングレート",
"type": "integer"
},
"outputStereo": {
"title": "音声データをステレオ出力するか否か",
"type": "boolean"
},
"phonemes": {
"items": {
"$ref": "#/components/schemas/FramePhoneme"
},
"title": "音素のリスト",
"type": "array"
},
"volume": {
"items": {
"type": "number"
},
"title": "フレームごとの音量",
"type": "array"
},
"volumeScale": {
"title": "全体の音量",
"type": "number"
}
},
"required": [
"f0",
"volume",
"phonemes",
"volumeScale",
"outputSamplingRate",
"outputStereo"
],
"title": "FrameAudioQuery",
"type": "object"
},
"FramePhoneme": {
"description": "音素の情報",
"properties": {
"frame_length": {
"title": "音素のフレーム長",
"type": "integer"
},
"phoneme": {
"title": "音素",
"type": "string"
}
},
"required": [
"phoneme",
"frame_length"
],
"title": "FramePhoneme",
"type": "object"
},
"HTTPValidationError": {
"properties": {
"detail": {
Expand Down Expand Up @@ -448,6 +520,29 @@
"title": "MorphableTargetInfo",
"type": "object"
},
"Note": {
"description": "音符ごとの情報",
"properties": {
"frame_length": {
"title": "音符のフレーム長",
"type": "integer"
},
"key": {
"title": "音階",
"type": "integer"
},
"lyric": {
"title": "音符の歌詞",
"type": "string"
}
},
"required": [
"frame_length",
"lyric"
],
"title": "Note",
"type": "object"
},
"ParseKanaBadRequest": {
"properties": {
"error_args": {
Expand Down Expand Up @@ -534,6 +629,23 @@
"title": "Preset",
"type": "object"
},
"Score": {
"description": "楽譜情報",
"properties": {
"notes": {
"items": {
"$ref": "#/components/schemas/Note"
},
"title": "音符のリスト",
"type": "array"
}
},
"required": [
"notes"
],
"title": "Score",
"type": "object"
},
"Speaker": {
"description": "話者情報",
"properties": {
Expand Down Expand Up @@ -611,6 +723,15 @@
"name": {
"title": "スタイル名",
"type": "string"
},
"type": {
"enum": [
"talk",
"humming",
"sing_teacher"
],
"title": "モデルの種類",
"type": "string"
}
},
"required": [
Expand Down Expand Up @@ -1433,6 +1554,69 @@
]
}
},
"/frame_synthesis": {
"post": {
"description": "歌唱音声合成を行います。",
"operationId": "frame_synthesis_frame_synthesis_post",
"parameters": [
{
"in": "query",
"name": "speaker",
"required": true,
"schema": {
"title": "Speaker",
"type": "integer"
}
},
{
"in": "query",
"name": "core_version",
"required": false,
"schema": {
"title": "Core Version",
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FrameAudioQuery"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"audio/wav": {
"schema": {
"format": "binary",
"type": "string"
}
}
},
"description": "Successful Response"
},
"422": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
},
"description": "Validation Error"
}
},
"summary": "Frame Synthesis",
"tags": [
"音声合成"
]
}
},
"/import_user_dict": {
"post": {
"description": "他のユーザー辞書をインポートします。\n\nParameters\n----------\nimport_dict_data: dict[str, UserDictWord]\n インポートするユーザー辞書のデータ\noverride: bool\n 重複したエントリがあった場合、上書きするかどうか",
Expand Down Expand Up @@ -2066,6 +2250,68 @@
]
}
},
"/sing_frame_audio_query": {
"post": {
"description": "歌唱音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま歌唱音声合成に利用できます。各値の意味は`Schemas`を参照してください。",
"operationId": "sing_frame_audio_query_sing_frame_audio_query_post",
"parameters": [
{
"in": "query",
"name": "speaker",
"required": true,
"schema": {
"title": "Speaker",
"type": "integer"
}
},
{
"in": "query",
"name": "core_version",
"required": false,
"schema": {
"title": "Core Version",
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Score"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FrameAudioQuery"
}
}
},
"description": "Successful Response"
},
"422": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
},
"description": "Validation Error"
}
},
"summary": "歌唱音声合成用のクエリを作成する",
"tags": [
"クエリ作成"
]
}
},
"/speaker_info": {
"get": {
"description": "指定されたspeaker_uuidに関する情報をjson形式で返します。\n画像や音声はbase64エンコードされたものが返されます。\n\nReturns\n-------\nret_data: SpeakerInfo",
Expand Down
Loading

0 comments on commit ea76515

Please sign in to comment.