From b99738545ff0c7cd3da22ddfef6c0b7f0196b128 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 9 Dec 2023 15:01:03 +0900
Subject: [PATCH 001/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20docstring?=
 =?UTF-8?q?=E8=BF=BD=E5=8A=A0=20(#817)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_synthesis_engine.py                 |  51 ++++---
 voicevox_engine/full_context_label.py         |  11 ++
 voicevox_engine/kana_parser.py                |  80 +++++++++--
 .../synthesis_engine/synthesis_engine_base.py | 127 ++++++++++++++----
 4 files changed, 220 insertions(+), 49 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index f9bfa2078..e155c2649 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -723,9 +723,14 @@ def result_value(i: int):
         self.assertEqual(result, true_result)
 
     def synthesis_test_base(self, audio_query: AudioQuery):
+        # Inputs 音素長・モーラ音高の設定 & Expects 音素長・音素ID・モーラ音高の記録
+        #     Inputs
+        #         `audio_query`: 子音長0.1秒/母音長0.1秒/モーラ音高ランダム
+        #     Expects
+        #         `phoneme_length_list`: 音素長系列
+        #         `phoneme_id_list`: 音素ID系列
+        #         `f0_list`: モーラ音高系列
         accent_phrases = audio_query.accent_phrases
-
-        # decode forwardのために適当にpitchとlengthを設定し、リストで持っておく
         phoneme_length_list = [0.0]
         phoneme_id_list = [0]
         f0_list = [0.0]
@@ -750,42 +755,46 @@ def synthesis_test_base(self, audio_query: AudioQuery):
         phoneme_length_list.append(0.0)
         phoneme_id_list.append(0)
         f0_list.append(0.0)
-
         phoneme_length_list[0] = audio_query.prePhonemeLength
         phoneme_length_list[-1] = audio_query.postPhonemeLength
 
+        # Expects: speedScale適用
         for i in range(len(phoneme_length_list)):
             phoneme_length_list[i] /= audio_query.speedScale
 
+        # Outputs: MockCore入りSynthesisEngine の `.synthesis` 出力および core.decode_forward 引数
         result = self.synthesis_engine.synthesis(query=audio_query, style_id=1)
-
-        # decodeに渡される値の検証
         decode_args = self.decode_mock.call_args[1]
         list_length = decode_args["length"]
+
+        # Test: フレーム長
         self.assertEqual(
             list_length,
             int(sum([round(p * 24000 / 256) for p in phoneme_length_list])),
         )
 
+        # Expects: Apply/Convert/Rescale
         num_phoneme = OjtPhoneme.num_phoneme
         # mora_phoneme_listのPhoneme ID版
         mora_phoneme_id_list = [OjtPhoneme(p).phoneme_id for p in mora_phoneme_list]
 
-        # numpy.repeatをfor文でやる
-        f0 = []
-        phoneme = []
+        f0 = []  # フレームごとの音高系列
+        phoneme = []  # フレームごとの音素onehotベクトル系列
         f0_index = 0
         mean_f0 = []
         for i, phoneme_length in enumerate(phoneme_length_list):
+            # Expects: pitchScale適用
             f0_single = numpy.array(f0_list[f0_index], dtype=numpy.float32) * (
                 2**audio_query.pitchScale
             )
+            # Expects: フレームスケール化
             for _ in range(int(round(phoneme_length * (24000 / 256)))):
                 f0.append([f0_single])
+                # Expects: 音素onehot化
                 phoneme_s = []
                 for _ in range(num_phoneme):
                     phoneme_s.append(0)
-                # one hot
+                # Expects: 音素フレームスケール化
                 phoneme_s[phoneme_id_list[i]] = 1
                 phoneme.append(phoneme_s)
             # consonantとvowelを判別し、vowelであればf0_indexを一つ進める
@@ -793,44 +802,56 @@ def synthesis_test_base(self, audio_query: AudioQuery):
                 if f0_single > 0:
                     mean_f0.append(f0_single)
                 f0_index += 1
-
+        # Expects: 抑揚スケール適用
         mean_f0 = numpy.array(mean_f0, dtype=numpy.float32).mean()
         f0 = numpy.array(f0, dtype=numpy.float32)
         for i in range(len(f0)):
             if f0[i][0] != 0.0:
                 f0[i][0] = (f0[i][0] - mean_f0) * audio_query.intonationScale + mean_f0
-
         phoneme = numpy.array(phoneme, dtype=numpy.float32)
 
+        assert_f0_count = 0
+
+        # Outputs: decode_forward `f0` 引数
+        decode_f0 = decode_args["f0"]
+
+        # Test: フレームごとの音高系列
         # 乱数の影響で数値の位置がずれが生じるので、大半(4/5)があっていればよしとする
         # また、上の部分のint(round(phoneme_length * (24000 / 256)))の影響で
         # 本来のf0/phonemeとテスト生成したf0/phonemeの長さが変わることがあり、
         # テスト生成したものが若干長くなることがあるので、本来のものの長さを基準にassertする
-        assert_f0_count = 0
-        decode_f0 = decode_args["f0"]
         for i in range(len(decode_f0)):
             # 乱数の影響等で数値にずれが生じるので、10の-5乗までの近似値であれば許容する
             assert_f0_count += math.isclose(f0[i][0], decode_f0[i][0], rel_tol=10e-5)
         self.assertTrue(assert_f0_count >= int(len(decode_f0) / 5) * 4)
+
         assert_phoneme_count = 0
+
+        # Outputs: decode_forward `phoneme` 引数
         decode_phoneme = decode_args["phoneme"]
+
+        # Test: フレームごとの音素系列
         for i in range(len(decode_phoneme)):
             assert_true_count = 0
             for j in range(len(decode_phoneme[i])):
                 assert_true_count += bool(phoneme[i][j] == decode_phoneme[i][j])
             assert_phoneme_count += assert_true_count == num_phoneme
+
         self.assertTrue(assert_phoneme_count >= int(len(decode_phoneme) / 5) * 4)
+
+        # Test: スタイルID
         self.assertEqual(decode_args["style_id"], 1)
 
-        # decode forwarderのmockを使う
+        # Expects: waveform (by mock)
         true_result = decode_mock(list_length, num_phoneme, f0, phoneme, 1)
-
+        # Expects: 音量スケール適用
         true_result *= audio_query.volumeScale
 
         # TODO: resampyの部分は値の検証しようがないので、パスする
         if audio_query.outputSamplingRate != 24000:
             return
 
+        # Test:
         assert_result_count = 0
         for i in range(len(true_result)):
             if audio_query.outputStereo:
diff --git a/voicevox_engine/full_context_label.py b/voicevox_engine/full_context_label.py
index 894a56751..5ca599276 100644
--- a/voicevox_engine/full_context_label.py
+++ b/voicevox_engine/full_context_label.py
@@ -519,6 +519,17 @@ def labels(self):
 
 
 def extract_full_context_label(text: str):
+    """
+    日本語テキストから発話クラスを抽出
+    Parameters
+    ----------
+    text : str
+        日本語テキスト
+    Returns
+    -------
+    utterance : Utterance
+        発話
+    """
     labels = pyopenjtalk.extract_fullcontext(text)
     phonemes = [Phoneme.from_label(label=label) for label in labels]
     utterance = Utterance.from_phonemes(phonemes)
diff --git a/voicevox_engine/kana_parser.py b/voicevox_engine/kana_parser.py
index 8e0ff845a..14efb4672 100644
--- a/voicevox_engine/kana_parser.py
+++ b/voicevox_engine/kana_parser.py
@@ -1,15 +1,23 @@
+"""
+「AquesTalk風記法」を実装した AquesTalk風記法テキスト <-> アクセント句系列 変換。
+記法定義: `https://github.com/VOICEVOX/voicevox_engine/blob/master/README.md#読み方を-aquestalk風記法で取得修正するサンプルコード` # noqa
+"""
+
 from typing import List, Optional
 
 from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
 from .mora_list import openjtalk_text2mora
 
 _LOOP_LIMIT = 300
-_UNVOICE_SYMBOL = "_"
-_ACCENT_SYMBOL = "'"
-_NOPAUSE_DELIMITER = "/"
-_PAUSE_DELIMITER = "、"
-_WIDE_INTERROGATION_MARK = "？"
 
+# AquesTalk風記法特殊文字
+_UNVOICE_SYMBOL = "_"  # 無声化
+_ACCENT_SYMBOL = "'"  # アクセント位置
+_NOPAUSE_DELIMITER = "/"  # ポーズ無しアクセント句境界
+_PAUSE_DELIMITER = "、"  # ポーズ有りアクセント句境界
+_WIDE_INTERROGATION_MARK = "？"  # 疑問形
+
+# AquesTalk風記法とモーラの対応（音素長・音高 0 初期化、疑問形 off 初期化）
 _text2mora_with_unvoice = {}
 for text, (consonant, vowel) in openjtalk_text2mora.items():
     _text2mora_with_unvoice[text] = Mora(
@@ -22,6 +30,8 @@
         is_interrogative=False,
     )
     if vowel in ["a", "i", "u", "e", "o"]:
+        # 手前に`_`を入れると無声化
+        # 例: "_ホ" -> "hO"
         _text2mora_with_unvoice[_UNVOICE_SYMBOL + text] = Mora(
             text=text,
             consonant=consonant if len(consonant) > 0 else None,
@@ -35,9 +45,19 @@
 
 def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
     """
-    longest matchにより読み仮名からAccentPhraseを生成
-    入力長Nに対し計算量O(N^2)
+    単一アクセント句に相当するAquesTalk風記法テキストからアクセント句オブジェクトを生成
+    longest matchによりモーラ化。入力長Nに対し計算量O(N^2)。
+    Parameters
+    ----------
+    phrase : str
+        単一アクセント句に相当するAquesTalk風記法テキスト
+    Returns
+    -------
+    accent_phrase : AccentPhrase
+        アクセント句
     """
+    # NOTE: ポーズと疑問形はこの関数内で処理しない
+
     accent_index: Optional[int] = None
     moras: List[Mora] = []
 
@@ -48,24 +68,33 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
     outer_loop = 0
     while base_index < len(phrase):
         outer_loop += 1
+
+        # `'`の手前がアクセント位置
         if phrase[base_index] == _ACCENT_SYMBOL:
             if len(moras) == 0:
                 raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase)
+            # すでにアクセント位置がある場合はエラー
             if accent_index is not None:
                 raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase)
             accent_index = len(moras)
             base_index += 1
             continue
+
+        # モーラ探索
+        # より長い要素からなるモーラが見つかれば上書き（longest match）
+        # 例: phrase "キャ" -> "キ" 検出 -> "キャ" 検出/上書き -> Mora("キャ")
         for watch_index in range(base_index, len(phrase)):
+            # アクセント位置特殊文字が来たら探索打ち切り
             if phrase[watch_index] == _ACCENT_SYMBOL:
                 break
-            # 普通の文字の場合
             stack += phrase[watch_index]
             if stack in _text2mora_with_unvoice:
+                # より長い要素からなるモーラが見つかれば上書き（longest match）
+                # 例: phrase "キャ" -> "キ" 検出 -> "キャ" 検出/上書き -> Mora("キャ")
                 matched_text = stack
-        # push mora
         if matched_text is None:
             raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack)
+        # push mora
         else:
             moras.append(_text2mora_with_unvoice[matched_text].copy(deep=True))
             base_index += len(matched_text)
@@ -81,7 +110,15 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
 
 def parse_kana(text: str) -> List[AccentPhrase]:
     """
-    AquesTalk風記法テキストをパースして音長・音高未指定のaccent phraseに変換
+    AquesTalk風記法テキストからアクセント句系列を生成
+    Parameters
+    ----------
+    text : str
+        AquesTalk風記法テキスト
+    Returns
+    -------
+    parsed_results : List[AccentPhrase]
+        アクセント句（音素・モーラ音高 0初期化）系列を生成
     """
 
     parsed_results: List[AccentPhrase] = []
@@ -90,6 +127,7 @@ def parse_kana(text: str) -> List[AccentPhrase]:
         raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1)
 
     for i in range(len(text) + 1):
+        # アクセント句境界（`/`か`、`）の出現までインデックス進展
         if i == len(text) or text[i] in [_PAUSE_DELIMITER, _NOPAUSE_DELIMITER]:
             phrase = text[phrase_base:i]
             if len(phrase) == 0:
@@ -99,15 +137,19 @@ def parse_kana(text: str) -> List[AccentPhrase]:
                 )
             phrase_base = i + 1
 
+            # アクセント句末に`？`で疑問文
             is_interrogative = _WIDE_INTERROGATION_MARK in phrase
             if is_interrogative:
                 if _WIDE_INTERROGATION_MARK in phrase[:-1]:
                     raise ParseKanaError(
                         ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase
                     )
+                # 疑問形はモーラでなくアクセント句属性で表現
                 phrase = phrase.replace(_WIDE_INTERROGATION_MARK, "")
 
             accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase)
+
+            # `、`で無音区間を挿入
             if i < len(text) and text[i] == _PAUSE_DELIMITER:
                 accent_phrase.pause_mora = Mora(
                     text="、",
@@ -125,22 +167,38 @@ def parse_kana(text: str) -> List[AccentPhrase]:
 
 
 def create_kana(accent_phrases: List[AccentPhrase]) -> str:
+    """
+    アクセント句系列からAquesTalk風記法テキストを生成
+    Parameters
+    ----------
+    accent_phrases : List[AccentPhrase]
+        アクセント句系列
+    Returns
+    -------
+    text : str
+        AquesTalk風記法テキスト
+    """
     text = ""
+    # アクセント句を先頭から逐次パースし、`text`末尾にAquesTalk風記法の文字を都度追加（ループ）
     for i, phrase in enumerate(accent_phrases):
         for j, mora in enumerate(phrase.moras):
+            # Rule3: "カナの手前に`_`を入れるとそのカナは無声化される"
             if mora.vowel in ["A", "I", "U", "E", "O"]:
                 text += _UNVOICE_SYMBOL
-
             text += mora.text
+            # `'`でアクセント位置
             if j + 1 == phrase.accent:
                 text += _ACCENT_SYMBOL
 
+        # Rule5: "アクセント句末に`？`(全角)を入れることにより疑問文の発音ができる"
         if phrase.is_interrogative:
             text += _WIDE_INTERROGATION_MARK
 
         if i < len(accent_phrases) - 1:
             if phrase.pause_mora is None:
+                # アクセント句区切り
                 text += _NOPAUSE_DELIMITER
             else:
+                # 無音でアクセント句区切り
                 text += _PAUSE_DELIMITER
     return text
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine_base.py b/voicevox_engine/synthesis_engine/synthesis_engine_base.py
index fde453574..6a139a830 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine_base.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine_base.py
@@ -11,6 +11,16 @@
 
 
 def mora_to_text(mora: str) -> str:
+    """
+    Parameters
+    ----------
+    mora : str
+        モーラ音素文字列
+    Returns
+    -------
+    mora : str
+        モーラ音素文字列
+    """
     if mora[-1:] in ["A", "I", "U", "E", "O"]:
         # 無声化母音を小文字に
         mora = mora[:-1] + mora[-1].lower()
@@ -24,10 +34,18 @@ def adjust_interrogative_accent_phrases(
     accent_phrases: List[AccentPhrase],
 ) -> List[AccentPhrase]:
     """
-    enable_interrogative_upspeakが有効になっていて与えられたaccent_phrasesに疑問系のものがあった場合、
-    各accent_phraseの末尾にある疑問系発音用のMoraに対して直前のMoraより少し音を高くすることで疑問文ぽくする
-    NOTE: リファクタリング時に適切な場所へ移動させること
+    アクセント句系列の必要に応じて疑問系に補正
+    各accent_phraseの末尾のモーラより少し音の高い有声母音モーラを付与するすることで疑問文ぽくする
+    Parameters
+    ----------
+    accent_phrases : List[AccentPhrase]
+        アクセント句系列
+    Returns
+    -------
+    accent_phrases : List[AccentPhrase]
+        必要に応じて疑問形補正されたアクセント句系列
     """
+    # NOTE: リファクタリング時に適切な場所へ移動させること
     return [
         AccentPhrase(
             moras=adjust_interrogative_moras(accent_phrase),
@@ -40,7 +58,19 @@ def adjust_interrogative_accent_phrases(
 
 
 def adjust_interrogative_moras(accent_phrase: AccentPhrase) -> List[Mora]:
+    """
+    アクセント句に含まれるモーラ系列の必要に応じた疑問形補正
+    Parameters
+    ----------
+    accent_phrase : AccentPhrase
+        アクセント句
+    Returns
+    -------
+    moras : List[Mora]
+        補正済みモーラ系列
+    """
     moras = copy.deepcopy(accent_phrase.moras)
+    # 疑問形補正条件: 疑問形フラグON & 終端有声母音
     if accent_phrase.is_interrogative and not (len(moras) == 0 or moras[-1].pitch == 0):
         interrogative_mora = make_interrogative_mora(moras[-1])
         moras.append(interrogative_mora)
@@ -50,6 +80,17 @@ def adjust_interrogative_moras(accent_phrase: AccentPhrase) -> List[Mora]:
 
 
 def make_interrogative_mora(last_mora: Mora) -> Mora:
+    """
+    疑問形用のモーラ（同一母音・継続長 0.15秒・音高↑）の生成
+    Parameters
+    ----------
+    last_mora : Mora
+        疑問形にするモーラ
+    Returns
+    -------
+    mora : Mora
+        疑問形用のモーラ
+    """
     fix_vowel_length = 0.15
     adjust_pitch = 0.3
     max_pitch = 6.5
@@ -66,6 +107,17 @@ def make_interrogative_mora(last_mora: Mora) -> Mora:
 def full_context_label_moras_to_moras(
     full_context_moras: List[full_context_label.Mora],
 ) -> List[Mora]:
+    """
+    Moraクラスのキャスト (`full_context_label.Mora` -> `Mora`)
+    Parameters
+    ----------
+    full_context_moras : List[full_context_label.Mora]
+        モーラ系列
+    Returns
+    -------
+    moras : List[Mora]
+        モーラ系列。音素長・モーラ音高は 0 初期化
+    """
     return [
         Mora(
             text=mora_to_text("".join([p.phoneme for p in mora.phonemes])),
@@ -85,25 +137,30 @@ class SynthesisEngineBase(metaclass=ABCMeta):
     def default_sampling_rate(self) -> int:
         raise NotImplementedError
 
-    # FIXME: jsonではなくModelを返すようにする
     @property
     @abstractmethod
     def speakers(self) -> str:
+        """話者情報（json文字列）"""
+        # FIXME: jsonではなくModelを返すようにする
         raise NotImplementedError
 
     @property
     @abstractmethod
     def supported_devices(self) -> Optional[str]:
+        """
+        デバイス対応情報
+        Returns
+        -------
+            対応デバイス一覧（None: 情報取得不可）
+        """
         raise NotImplementedError
 
     def initialize_style_id_synthesis(  # noqa: B027
-        self,
-        style_id: int,
-        skip_reinit: bool,
+        self, style_id: int, skip_reinit: bool
     ):
         """
-        指定したスタイルでの音声合成を初期化する。何度も実行可能。
-        未実装の場合は何もしない
+        指定したスタイルでの音声合成を初期化する。
+        何度も実行可能。未実装の場合は何もしない。
         Parameters
         ----------
         style_id : int
@@ -132,62 +189,86 @@ def replace_phoneme_length(
         self, accent_phrases: List[AccentPhrase], style_id: int
     ) -> List[AccentPhrase]:
         """
-        accent_phrasesの母音・子音の長さを設定する
+        音素長の更新
         Parameters
         ----------
         accent_phrases : List[AccentPhrase]
-            アクセント句モデルのリスト
+            アクセント句系列
         style_id : int
             スタイルID
         Returns
         -------
         accent_phrases : List[AccentPhrase]
-            母音・子音の長さが設定されたアクセント句モデルのリスト
+            音素長が更新されたアクセント句系列
         """
         raise NotImplementedError()
 
     @abstractmethod
     def replace_mora_pitch(
-        self,
-        accent_phrases: List[AccentPhrase],
-        style_id: int,
+        self, accent_phrases: List[AccentPhrase], style_id: int
     ) -> List[AccentPhrase]:
         """
-        accent_phrasesの音高(ピッチ)を設定する
+        モーラ音高の更新
         Parameters
         ----------
         accent_phrases : List[AccentPhrase]
-            アクセント句モデルのリスト
+            アクセント句系列
         style_id : int
             スタイルID
         Returns
         -------
         accent_phrases : List[AccentPhrase]
-            音高(ピッチ)が設定されたアクセント句モデルのリスト
+            モーラ音高が更新されたアクセント句系列
         """
         raise NotImplementedError()
 
     def replace_mora_data(
-        self,
-        accent_phrases: List[AccentPhrase],
-        style_id: int,
+        self, accent_phrases: List[AccentPhrase], style_id: int
     ) -> List[AccentPhrase]:
+        """
+        音素長・モーラ音高の更新
+        Parameters
+        ----------
+        accent_phrases : List[AccentPhrase]
+            アクセント句系列
+        style_id : int
+            スタイルID
+        Returns
+        -------
+        accent_phrases : List[AccentPhrase]
+            アクセント句系列
+        """
         return self.replace_mora_pitch(
             accent_phrases=self.replace_phoneme_length(
-                accent_phrases=accent_phrases,
-                style_id=style_id,
+                accent_phrases=accent_phrases, style_id=style_id
             ),
             style_id=style_id,
         )
 
     def create_accent_phrases(self, text: str, style_id: int) -> List[AccentPhrase]:
+        """
+        テキストからアクセント句系列を生成。
+        音素長やモーラ音高も更新。
+        Parameters
+        ----------
+        text : str
+            日本語テキスト
+        style_id : int
+            スタイルID
+        Returns
+        -------
+        accent_phrases : List[AccentPhrase]
+            アクセント句系列
+        """
         if len(text.strip()) == 0:
             return []
 
+        # 音素とアクセントの推定
         utterance = extract_full_context_label(text)
         if len(utterance.breath_groups) == 0:
             return []
 
+        # Utterance -> List[AccentPharase] のキャスト & 音素長・モーラ音高の推定と更新
         accent_phrases = self.replace_mora_data(
             accent_phrases=[
                 AccentPhrase(

From 5e63e3660985b423fedd9aee969835720a8eddd5 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 9 Dec 2023 18:03:20 +0900
Subject: [PATCH 002/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`PresetManager`?=
 =?UTF-8?q?=20=E5=85=B1=E9=80=9A=E5=87=A6=E7=90=86=E3=81=AE=E9=96=A2?=
 =?UTF-8?q?=E6=95=B0=E5=8C=96=E3=83=BB=E8=BF=BD=E5=8A=A0=E3=82=B3=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=E3=83=88=20(#832)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/preset/PresetManager.py | 114 ++++++++++++------------
 1 file changed, 56 insertions(+), 58 deletions(-)

diff --git a/voicevox_engine/preset/PresetManager.py b/voicevox_engine/preset/PresetManager.py
index b994eb677..d06ec2b03 100644
--- a/voicevox_engine/preset/PresetManager.py
+++ b/voicevox_engine/preset/PresetManager.py
@@ -9,43 +9,53 @@
 
 
 class PresetManager:
-    def __init__(
-        self,
-        preset_path: Path,
-    ):
-        self.presets = []
+    """
+    プリセットの管理
+
+    プリセットはAudioQuery全体パラメータ（話速・音高・抑揚・音量・無音長）のデフォルト値セットである。
+    YAMLファイルをSSoTとする簡易データベース方式により、プリセットの管理をおこなう。
+    """
+
+    def __init__(self, preset_path: Path):
+        """
+        Parameters
+        ----------
+        preset_path : Path
+            プリセット情報を一元管理するYAMLファイルへのパス
+        """
+        self.presets: list[Preset] = []
         self.last_modified_time = 0
         self.preset_path = preset_path
 
-    def load_presets(self):
+    def load_presets(self) -> list[Preset]:
         """
-        プリセットのYAMLファイルを読み込む
-
+        既存プリセットの読み込み
         Returns
         -------
-        ret: List[Preset]
-            プリセットのリスト
+        ret: list[Preset]
+            読み込まれたプリセットのリスト
         """
 
-        # 設定ファイルのタイムスタンプを確認
+        # データベース更新の確認（タイムスタンプベース）
         try:
             _last_modified_time = self.preset_path.stat().st_mtime
             if _last_modified_time == self.last_modified_time:
+                # 更新無し、キャッシュを返す
                 return self.presets
         except OSError:
             raise PresetError("プリセットの設定ファイルが見つかりません")
 
+        # データベースの読み込み
         with open(self.preset_path, mode="r", encoding="utf-8") as f:
             obj = yaml.safe_load(f)
             if obj is None:
                 raise PresetError("プリセットの設定ファイルが空の内容です")
-
         try:
             _presets = parse_obj_as(List[Preset], obj)
         except ValidationError:
             raise PresetError("プリセットの設定ファイルにミスがあります")
 
-        # idが一意か確認
+        # 全idの一意性をバリデーション
         if len([preset.id for preset in _presets]) != len(
             {preset.id for preset in _presets}
         ):
@@ -53,40 +63,34 @@ def load_presets(self):
 
         self.presets = _presets
         self.last_modified_time = _last_modified_time
+
         return self.presets
 
     def add_preset(self, preset: Preset):
         """
-        YAMLファイルに新規のプリセットを追加する
-
+        新規プリセットの追加
         Parameters
         ----------
         preset : Preset
-            追加するプリセットを渡す
-
+            新規プリセット
         Returns
         -------
         ret: int
-            追加したプリセットのプリセットID
+            追加されたプリセットのID
         """
 
-        # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
+        # データベース更新の反映
         self.load_presets()
 
-        # IDが0未満、または存在するIDなら新しいIDを決定し、配列に追加
+        # 新規プリセットID の発行。IDが0未満、または存在するIDなら新規IDを発行
         if preset.id < 0 or preset.id in {preset.id for preset in self.presets}:
             preset.id = max([preset.id for preset in self.presets]) + 1
+        # 新規プリセットの追加
         self.presets.append(preset)
 
-        # ファイルに書き込み
+        # 変更の反映。失敗時はリバート。
         try:
-            with open(self.preset_path, mode="w", encoding="utf-8") as f:
-                yaml.safe_dump(
-                    [preset.dict() for preset in self.presets],
-                    f,
-                    allow_unicode=True,
-                    sort_keys=False,
-                )
+            self._write_on_file()
         except Exception as err:
             self.presets.pop()
             if isinstance(err, FileNotFoundError):
@@ -98,23 +102,21 @@ def add_preset(self, preset: Preset):
 
     def update_preset(self, preset: Preset):
         """
-        YAMLファイルのプリセットを更新する
-
+        既存プリセットの更新
         Parameters
         ----------
         preset : Preset
-            更新するプリセットを渡す
-
+            新しい既存プリセット
         Returns
         -------
         ret: int
-            更新したプリセットのプリセットID
+            更新されたプリセットのID
         """
 
-        # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
+        # データベース更新の反映
         self.load_presets()
 
-        # IDが存在するか探索
+        # 対象プリセットの検索
         prev_preset = (-1, None)
         for i in range(len(self.presets)):
             if self.presets[i].id == preset.id:
@@ -124,15 +126,9 @@ def update_preset(self, preset: Preset):
         else:
             raise PresetError("更新先のプリセットが存在しません")
 
-        # ファイルに書き込み
+        # 変更の反映。失敗時はリバート。
         try:
-            with open(self.preset_path, mode="w", encoding="utf-8") as f:
-                yaml.safe_dump(
-                    [preset.dict() for preset in self.presets],
-                    f,
-                    allow_unicode=True,
-                    sort_keys=False,
-                )
+            self._write_on_file()
         except Exception as err:
             if prev_preset != (-1, None):
                 self.presets[prev_preset[0]] = prev_preset[1]
@@ -145,23 +141,21 @@ def update_preset(self, preset: Preset):
 
     def delete_preset(self, id: int):
         """
-        YAMLファイルのプリセットを削除する
-
+        指定したIDのプリセットの削除
         Parameters
         ----------
         id: int
-            削除するプリセットのプリセットIDを渡す
-
+            削除対象プリセットのID
         Returns
         -------
         ret: int
-            削除したプリセットのプリセットID
+            削除されたプリセットのID
         """
 
-        # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
+        # データベース更新の反映
         self.load_presets()
 
-        # IDが存在するか探索
+        # 対象プリセットの検索
         buf = None
         buf_index = -1
         for i in range(len(self.presets)):
@@ -172,17 +166,21 @@ def delete_preset(self, id: int):
         else:
             raise PresetError("削除対象のプリセットが存在しません")
 
-        # ファイルに書き込み
+        # 変更の反映。失敗時はリバート。
         try:
-            with open(self.preset_path, mode="w", encoding="utf-8") as f:
-                yaml.safe_dump(
-                    [preset.dict() for preset in self.presets],
-                    f,
-                    allow_unicode=True,
-                    sort_keys=False,
-                )
+            self._write_on_file()
         except FileNotFoundError:
             self.presets.insert(buf_index, buf)
             raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
 
         return id
+
+    def _write_on_file(self):
+        """プリセット情報のファイル（簡易データベース）書き込み"""
+        with open(self.preset_path, mode="w", encoding="utf-8") as f:
+            yaml.safe_dump(
+                [preset.dict() for preset in self.presets],
+                f,
+                allow_unicode=True,
+                sort_keys=False,
+            )

From 154d5d01e08dd57895b776806952fe4172ed6943 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 10 Dec 2023 02:04:37 +0900
Subject: [PATCH 003/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=B0?=
 =?UTF-8?q?=E3=83=AD=E3=83=BC=E3=83=90=E3=83=AB=E7=89=B9=E5=BE=B4=E9=87=8F?=
 =?UTF-8?q?=E9=81=A9=E7=94=A8=E3=81=AE=E9=96=A2=E6=95=B0=E5=8C=96=20(#819)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_synthesis_engine.py                 | 144 +++++++++++++-
 .../synthesis_engine/synthesis_engine.py      | 179 ++++++++++++++----
 2 files changed, 285 insertions(+), 38 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index e155c2649..9ff7fb563 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -13,11 +13,17 @@
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
 from voicevox_engine.synthesis_engine.synthesis_engine import (
+    apply_intonation_scale,
+    apply_output_sampling_rate,
+    apply_output_stereo,
+    apply_pitch_scale,
+    apply_prepost_silence,
+    apply_speed_scale,
+    apply_volume_scale,
     calc_frame_per_phoneme,
     calc_frame_phoneme,
     calc_frame_pitch,
     mora_phoneme_list,
-    pad_with_silence,
     pre_process,
     split_mora,
     to_flatten_moras,
@@ -173,8 +179,8 @@ def _gen_mora(
     )
 
 
-def test_pad_with_silence():
-    """Test `pad_with_silence`."""
+def test_apply_prepost_silence():
+    """Test `apply_prepost_silence`."""
     # Inputs
     query = _gen_query(prePhonemeLength=2 * 0.01067, postPhonemeLength=6 * 0.01067)
     moras = [
@@ -189,11 +195,139 @@ def test_pad_with_silence():
     ]
 
     # Outputs
-    moras_with_silence = pad_with_silence(moras, query)
+    moras_with_silence = apply_prepost_silence(moras, query)
 
     assert moras_with_silence == true_moras_with_silence
 
 
+def test_apply_speed_scale():
+    """Test `apply_speed_scale`."""
+    # Inputs
+    query = _gen_query(speedScale=2.0)
+    input_moras = [
+        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
+        _gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
+        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
+        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
+    ]
+
+    # Expects - x2 fast
+    true_moras = [
+        _gen_mora("コ", "k", 1 * 0.01067, "o", 2 * 0.01067, 50.0),
+        _gen_mora("ン", None, None, "N", 2 * 0.01067, 50.0),
+        _gen_mora("、", None, None, "pau", 1 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 1 * 0.01067, "i", 2 * 0.01067, 125.0),
+        _gen_mora("ホ", "h", 2 * 0.01067, "O", 1 * 0.01067, 0.0),
+    ]
+
+    # Outputs
+    moras = apply_speed_scale(input_moras, query)
+
+    assert moras == true_moras
+
+
+def test_apply_pitch_scale():
+    """Test `apply_pitch_scale`."""
+    # Inputs
+    query = _gen_query(pitchScale=2.0)
+    input_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 50.0),
+        _gen_mora("ン", None, None, "N", 0.0, 50.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 125.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Expects - x4 value scaled
+    true_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 200.0),
+        _gen_mora("ン", None, None, "N", 0.0, 200.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 500.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Outputs
+    moras = apply_pitch_scale(input_moras, query)
+
+    assert moras == true_moras
+
+
+def test_apply_intonation_scale():
+    """Test `apply_intonation_scale`."""
+    # Inputs
+    query = _gen_query(intonationScale=0.5)
+    input_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 200.0),
+        _gen_mora("ン", None, None, "N", 0.0, 200.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 500.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Expects - mean=300 var x0.5 intonation scaling
+    true_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 250.0),
+        _gen_mora("ン", None, None, "N", 0.0, 250.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 400.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Outputs
+    moras = apply_intonation_scale(input_moras, query)
+
+    assert moras == true_moras
+
+
+def test_apply_volume_scale():
+    """Test `apply_volume_scale`."""
+    # Inputs
+    query = _gen_query(volumeScale=3.0)
+    input_wave = numpy.array([0.0, 1.0, 2.0])
+
+    # Expects - x3 scale
+    true_wave = numpy.array([0.0, 3.0, 6.0])
+
+    # Outputs
+    wave = apply_volume_scale(input_wave, query)
+
+    assert numpy.allclose(wave, true_wave)
+
+
+def test_apply_output_sampling_rate():
+    """Test `apply_output_sampling_rate`."""
+    # Inputs
+    query = _gen_query(outputSamplingRate=12000)
+    input_wave = numpy.array([1.0 for _ in range(120)])
+    input_sr_wave = 24000
+
+    # Expects - half sampling rate
+    true_wave = numpy.array([1.0 for _ in range(60)])
+    assert true_wave.shape == (60,), "Prerequisites"
+
+    # Outputs
+    wave = apply_output_sampling_rate(input_wave, input_sr_wave, query)
+
+    assert wave.shape[0] == true_wave.shape[0]
+
+
+def test_apply_output_stereo():
+    """Test `apply_output_stereo`."""
+    # Inputs
+    query = _gen_query(outputStereo=True)
+    input_wave = numpy.array([1.0, 0.0, 2.0])
+
+    # Expects - Stereo :: (Time, Channel)
+    true_wave = numpy.array([[1.0, 1.0], [0.0, 0.0], [2.0, 2.0]])
+
+    # Outputs
+    wave = apply_output_stereo(input_wave, query)
+
+    assert numpy.array_equal(wave, true_wave)
+
+
 def test_calc_frame_per_phoneme():
     """Test `calc_frame_per_phoneme`."""
     # Inputs
@@ -325,7 +459,7 @@ def test_feat_to_framescale():
     assert true_frame_per_phoneme.shape[0] == len(phoneme_data_list), "Prerequisites"
 
     # Outputs
-    flatten_moras = pad_with_silence(flatten_moras, query)
+    flatten_moras = apply_prepost_silence(flatten_moras, query)
     frame_per_phoneme = calc_frame_per_phoneme(query, flatten_moras)
     f0 = calc_frame_pitch(query, flatten_moras, phoneme_data_list, frame_per_phoneme)
     frame_phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index 9bd7dde56..9fa12d3a5 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -1,8 +1,10 @@
+import math
 import threading
 from itertools import chain
 from typing import List, Optional, Tuple
 
 import numpy
+from numpy import ndarray
 from soxr import resample
 
 from ..acoustic_feature_extractor import OjtPhoneme
@@ -112,8 +114,9 @@ def generate_silence_mora(length: float) -> Mora:
     return Mora(text="　", vowel="sil", vowel_length=length, pitch=0.0)
 
 
-def pad_with_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
-    """モーラ列の先頭/最後尾へqueryに基づいた無音モーラを追加
+def apply_prepost_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
+    """
+    前後無音（`prePhonemeLength` & `postPhonemeLength`）の適用
     Parameters
     ----------
     moras : List[Mora]
@@ -131,6 +134,27 @@ def pad_with_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
+def apply_speed_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
+    """
+    話速スケール（`speedScale`）の適用
+    Parameters
+    ----------
+    moras : list[Mora]
+        モーラ系列
+    query : AudioQuery
+        音声合成クエリ
+    Returns
+    -------
+    moras : list[Mora]
+        話速スケールが適用されたモーラ系列
+    """
+    for mora in moras:
+        mora.vowel_length /= query.speedScale
+        if mora.consonant_length:
+            mora.consonant_length /= query.speedScale
+    return moras
+
+
 def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
     """
     音素あたりのフレーム長を算出
@@ -145,6 +169,9 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
     frame_per_phoneme : NDArray[]
         音素あたりのフレーム長。端数丸め。
     """
+    # Apply: グローバル特徴量による補正（話速）
+    moras = apply_speed_scale(moras, query)
+
     # 音素あたりの継続長
     sec_per_phoneme = numpy.array(
         [
@@ -157,10 +184,6 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
         ],
         dtype=numpy.float32,
     )
-
-    # 話速による継続長の補正
-    sec_per_phoneme /= query.speedScale
-
     # 音素あたりのフレーム長。端数丸め。
     framerate = 24000 / 256  # framerate 93.75 [frame/sec]
     frame_per_phoneme = numpy.round(sec_per_phoneme * framerate).astype(numpy.int32)
@@ -168,6 +191,48 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
     return frame_per_phoneme
 
 
+def apply_pitch_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
+    """
+    音高スケール（`pitchScale`）の適用
+    Parameters
+    ----------
+    moras : list[Mora]
+        モーラ系列
+    query : AudioQuery
+        音声合成クエリ
+    Returns
+    -------
+    moras : list[Mora]
+        音高スケールが適用されたモーラ系列
+    """
+    for mora in moras:
+        mora.pitch *= 2**query.pitchScale
+    return moras
+
+
+def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
+    """
+    抑揚スケール（`intonationScale`）の適用
+    Parameters
+    ----------
+    moras : list[Mora]
+        モーラ系列
+    query : AudioQuery
+        音声合成クエリ
+    Returns
+    -------
+    moras : list[Mora]
+        抑揚スケールが適用されたモーラ系列
+    """
+    # 有声音素 (f0>0) の平均値に対する乖離度をスケール
+    voiced = list(filter(lambda mora: mora.pitch > 0, moras))
+    mean_f0 = numpy.mean(list(map(lambda mora: mora.pitch, voiced))).item()
+    if mean_f0 != math.nan:  # 空リスト -> NaN
+        for mora in voiced:
+            mora.pitch = (mora.pitch - mean_f0) * query.intonationScale + mean_f0
+    return moras
+
+
 def calc_frame_pitch(
     query: AudioQuery,
     moras: List[Mora],
@@ -191,30 +256,41 @@ def calc_frame_pitch(
     frame_f0 : NDArray[]
         フレームごとの基本周波数系列
     """
+    moras = apply_pitch_scale(moras, query)
+    moras = apply_intonation_scale(moras, query)
+
     # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
     # モーラごとの基本周波数
     f0 = numpy.array([mora.pitch for mora in moras], dtype=numpy.float32)
 
-    # 音高スケールによる補正
-    f0 *= 2**query.pitchScale
-
-    # 抑揚スケールによる補正。有声音素 (f0>0) の平均値に対する乖離度をスケール
-    voiced = f0 > 0
-    mean_f0 = f0[voiced].mean()
-    if not numpy.isnan(mean_f0):
-        f0[voiced] = (f0[voiced] - mean_f0) * query.intonationScale + mean_f0
-
-    # フレームごとのピッチ化
+    # Rescale: 時間スケールの変更（モーラ -> フレーム）
     # 母音インデックスに基づき "音素あたりのフレーム長" を "モーラあたりのフレーム長" に集約
     vowel_indexes = numpy.array(split_mora(phonemes)[2])
     frame_per_mora = [
         a.sum() for a in numpy.split(frame_per_phoneme, vowel_indexes[:-1] + 1)
     ]
-    # モーラの基本周波数を子音・母音に割当てフレーム化
     frame_f0 = numpy.repeat(f0, frame_per_mora)
     return frame_f0
 
 
+def apply_volume_scale(wave: numpy.ndarray, query: AudioQuery) -> numpy.ndarray:
+    """
+    音量スケール（`volumeScale`）の適用
+    Parameters
+    ----------
+    wave : numpy.ndarray
+        音声波形
+    query : AudioQuery
+        音声合成クエリ
+    Returns
+    -------
+    wave : numpy.ndarray
+        音量スケールが適用された音声波形
+    """
+    wave *= query.volumeScale
+    return wave
+
+
 def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndarray):
     """
     フレームごとの音素列の生成（onehot化 + フレーム化）
@@ -230,11 +306,59 @@ def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndar
         フレームごとの音素系列
     """
     # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
+    # Convert: Core入力形式への変換（onehotベクトル系列）
     onehot_phoneme = numpy.stack([p.onehot for p in phonemes])
+
+    # Rescale: 時間スケールの変更（音素 -> フレーム）
     frame_phoneme = numpy.repeat(onehot_phoneme, frame_per_phoneme, axis=0)
     return frame_phoneme
 
 
+def apply_output_sampling_rate(
+    wave: ndarray, sr_wave: int, query: AudioQuery
+) -> ndarray:
+    """
+    出力サンプリングレート（`outputSamplingRate`）の適用
+    Parameters
+    ----------
+    wave : ndarray
+        音声波形
+    sr_wave : int
+        `wave`のサンプリングレート
+    query : AudioQuery
+        音声合成クエリ
+    Returns
+    -------
+    wave : ndarray
+        出力サンプリングレートが適用された音声波形
+    """
+    # サンプリングレート一致のときはスルー
+    if sr_wave == query.outputSamplingRate:
+        return wave
+
+    wave = resample(wave, sr_wave, query.outputSamplingRate)
+    return wave
+
+
+def apply_output_stereo(wave: ndarray, query: AudioQuery) -> ndarray:
+    """
+    ステレオ出力（`outputStereo`）の適用
+    Parameters
+    ----------
+    wave : ndarray
+        音声波形
+    query : AudioQuery
+        音声合成クエリ
+    Returns
+    -------
+    wave : ndarray
+        ステレオ出力設定が適用された音声波形
+    """
+    if query.outputStereo:
+        wave = numpy.array([wave, wave]).T
+    return wave
+
+
 class SynthesisEngine(SynthesisEngineBase):
     """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
 
@@ -493,7 +617,7 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
         # AccentPhraseをすべてMoraおよびOjtPhonemeの形に分解し、処理可能な形にする
         flatten_moras, phoneme_data_list = pre_process(query.accent_phrases)
 
-        flatten_moras = pad_with_silence(flatten_moras, query)
+        flatten_moras = apply_prepost_silence(flatten_moras, query)
         frame_per_phoneme = calc_frame_per_phoneme(query, flatten_moras)
         f0 = calc_frame_pitch(
             query, flatten_moras, phoneme_data_list, frame_per_phoneme
@@ -509,21 +633,10 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
                 phoneme=phoneme,
                 style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
             )
+            sr_wave = self.default_sampling_rate
 
-        # volume: ゲイン適用
-        wave *= query.volumeScale
-
-        # 出力サンプリングレートがデフォルト(decode forwarderによるもの、24kHz)でなければ、それを適用する
-        if query.outputSamplingRate != self.default_sampling_rate:
-            wave = resample(
-                wave,
-                self.default_sampling_rate,
-                query.outputSamplingRate,
-            )
-
-        # ステレオ変換
-        # 出力設定がステレオなのであれば、ステレオ化する
-        if query.outputStereo:
-            wave = numpy.array([wave, wave]).T
+        wave = apply_volume_scale(wave, query)
+        wave = apply_output_sampling_rate(wave, sr_wave, query)
+        wave = apply_output_stereo(wave, query)
 
         return wave

From d0a596d06a57f48839bd685953069dbac8f11ead Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 10 Dec 2023 02:07:00 +0900
Subject: [PATCH 004/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`default=5Fsettin?=
 =?UTF-8?q?g.yml`=20=E6=8B=A1=E5=BC=B5=E5=AD=90=E5=A4=89=E6=9B=B4=20(#837)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile                                  | 2 +-
 default_setting.yml => default_setting.yaml | 0
 run.spec                                    | 2 +-
 test/e2e/conftest.py                        | 2 +-
 voicevox_engine/setting/SettingLoader.py    | 2 +-
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename default_setting.yml => default_setting.yaml (100%)

diff --git a/Dockerfile b/Dockerfile
index 5a5279235..ef920ecf4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -227,7 +227,7 @@ COPY --from=download-onnxruntime-env /opt/onnxruntime /opt/onnxruntime
 # Add local files
 ADD ./voicevox_engine /opt/voicevox_engine/voicevox_engine
 ADD ./docs /opt/voicevox_engine/docs
-ADD ./run.py ./generate_licenses.py ./presets.yaml ./default.csv ./default_setting.yml ./engine_manifest.json /opt/voicevox_engine/
+ADD ./run.py ./generate_licenses.py ./presets.yaml ./default.csv ./default_setting.yaml ./engine_manifest.json /opt/voicevox_engine/
 ADD ./speaker_info /opt/voicevox_engine/speaker_info
 ADD ./ui_template /opt/voicevox_engine/ui_template
 ADD ./engine_manifest_assets /opt/voicevox_engine/engine_manifest_assets
diff --git a/default_setting.yml b/default_setting.yaml
similarity index 100%
rename from default_setting.yml
rename to default_setting.yaml
diff --git a/run.spec b/run.spec
index b84c82408..9f73c6b07 100644
--- a/run.spec
+++ b/run.spec
@@ -10,7 +10,7 @@ datas = [
     ('default.csv', '.'),
     ('licenses.json', '.'),
     ('presets.yaml', '.'),
-    ('default_setting.yml', '.'),
+    ('default_setting.yaml', '.'),
     ('ui_template', 'ui_template'),
 ]
 datas += collect_data_files('pyopenjtalk')
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index af21590c1..b656886b6 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -14,7 +14,7 @@
 def client():
     synthesis_engines = make_synthesis_engines(use_gpu=False)
     latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
-    setting_loader = SettingLoader(Path("./default_setting.yml"))
+    setting_loader = SettingLoader(Path("./default_setting.yaml"))
     preset_manager = PresetManager(  # FIXME: impl MockPresetManager
         preset_path=Path("./presets.yaml"),
     )
diff --git a/voicevox_engine/setting/SettingLoader.py b/voicevox_engine/setting/SettingLoader.py
index a78952e96..3f1669e26 100644
--- a/voicevox_engine/setting/SettingLoader.py
+++ b/voicevox_engine/setting/SettingLoader.py
@@ -5,7 +5,7 @@
 from ..utility import engine_root, get_save_dir
 from .Setting import Setting
 
-DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yml"
+DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yaml"
 USER_SETTING_PATH: Path = get_save_dir() / "setting.yml"
 
 

From 5d7562c51364e4672462f5110c4c87e1a8eaf145 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 10 Dec 2023 06:44:35 +0900
Subject: [PATCH 005/177] =?UTF-8?q?Refactor:=20`frame=5Fper=5Fmora`=20?=
 =?UTF-8?q?=E3=81=AB=E3=82=88=E3=82=8B=E7=BD=AE=E3=81=8D=E6=8F=9B=E3=81=88?=
 =?UTF-8?q?=20(#841)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_synthesis_engine.py                 | 48 +++++++++++++------
 .../synthesis_engine/synthesis_engine.py      | 43 ++++++++++-------
 2 files changed, 60 insertions(+), 31 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 9ff7fb563..eee0cae66 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -20,6 +20,7 @@
     apply_prepost_silence,
     apply_speed_scale,
     apply_volume_scale,
+    calc_frame_per_mora,
     calc_frame_per_phoneme,
     calc_frame_phoneme,
     calc_frame_pitch,
@@ -353,24 +354,43 @@ def test_calc_frame_per_phoneme():
     assert numpy.array_equal(frame_per_phoneme, true_frame_per_phoneme)
 
 
+def test_calc_frame_per_mora():
+    """Test `calc_frame_per_mora`."""
+    # Inputs
+    moras = [
+        _gen_mora("　", None, None, "　", 2 * 0.01067, 0.0),  # 0.01067 [sec/frame]
+        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 0.0),
+        _gen_mora("ン", None, None, "N", 4 * 0.01067, 0.0),
+        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 0.0),
+        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
+        _gen_mora("　", None, None, "　", 6 * 0.01067, 0.0),
+    ]
+
+    # Expects
+    #                    Pre ko  N pau hi hO Pst
+    true_frame_per_mora = [2, 6, 4, 2, 6, 6, 6]
+    true_frame_per_mora = numpy.array(true_frame_per_mora, dtype=numpy.int32)
+
+    # Outputs
+    frame_per_phoneme = numpy.array(list(map(calc_frame_per_mora, moras)))
+
+    assert numpy.array_equal(frame_per_phoneme, true_frame_per_mora)
+
+
 def test_calc_frame_pitch():
     """Test `test_calc_frame_pitch`."""
     # Inputs
     query = _gen_query(pitchScale=2.0, intonationScale=0.5)
     moras = [
-        _gen_mora("　", None, None, "　", 0.0, 0.0),
-        _gen_mora("コ", "k", 0.0, "o", 0.0, 50.0),
-        _gen_mora("ン", None, None, "N", 0.0, 50.0),
-        _gen_mora("、", None, None, "pau", 0.0, 0.0),
-        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 125.0),
-        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
-        _gen_mora("　", None, None, "　", 0.0, 0.0),
+        _gen_mora("　", None, None, "　", 1 * 0.01067, 0.0),
+        _gen_mora("コ", "k", 1 * 0.01067, "o", 2 * 0.01067, 50.0),
+        _gen_mora("ン", None, None, "N", 2 * 0.01067, 50.0),
+        _gen_mora("、", None, None, "pau", 1 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 1 * 0.01067, "i", 2 * 0.01067, 125.0),
+        _gen_mora("ホ", "h", 2 * 0.01067, "O", 1 * 0.01067, 0.0),
+        _gen_mora("　", None, None, "　", 3 * 0.01067, 0.0),
     ]
-    phoneme_str = "pau k o N pau h i h O pau"
-    phonemes = [OjtPhoneme(p) for p in phoneme_str.split()]
-    #                   Pre k  o  N pau h  i  h  O Pst
-    frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
-    frame_per_phoneme = numpy.array(frame_per_phoneme, dtype=numpy.int32)
 
     # Expects - x4 value scaled -> mean=300 var x0.5 intonation scaling
     #           pau   ko     ko     ko      N      N
@@ -382,7 +402,7 @@ def test_calc_frame_pitch():
     true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
 
     # Outputs
-    f0 = calc_frame_pitch(query, moras, phonemes, frame_per_phoneme)
+    f0 = calc_frame_pitch(query, moras)
 
     assert numpy.array_equal(f0, true_f0)
 
@@ -461,7 +481,7 @@ def test_feat_to_framescale():
     # Outputs
     flatten_moras = apply_prepost_silence(flatten_moras, query)
     frame_per_phoneme = calc_frame_per_phoneme(query, flatten_moras)
-    f0 = calc_frame_pitch(query, flatten_moras, phoneme_data_list, frame_per_phoneme)
+    f0 = calc_frame_pitch(query, flatten_moras)
     frame_phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
 
     assert numpy.array_equal(frame_phoneme, true_frame_phoneme)
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index 9fa12d3a5..d36d9a407 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -191,6 +191,29 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
     return frame_per_phoneme
 
 
+def _to_frame(sec: float) -> ndarray:
+    FRAMERATE = 93.75  # 24000 / 256 [frame/sec]
+    return numpy.round(sec * FRAMERATE).astype(numpy.int32)
+
+
+def calc_frame_per_mora(mora: Mora) -> ndarray:
+    """
+    モーラあたりのフレーム長を算出
+    Parameters
+    ----------
+    mora : Mora
+        モーラ
+    Returns
+    -------
+    frame_per_mora : NDArray[]
+        モーラあたりのフレーム長。端数丸め。
+    """
+    # 音素ごとにフレーム長を算出し、和をモーラのフレーム長とする
+    vowel_frames = _to_frame(mora.vowel_length)
+    consonant_frames = _to_frame(mora.consonant_length) if mora.consonant else 0
+    return vowel_frames + consonant_frames
+
+
 def apply_pitch_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     """
     音高スケール（`pitchScale`）の適用
@@ -233,12 +256,7 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def calc_frame_pitch(
-    query: AudioQuery,
-    moras: List[Mora],
-    phonemes: List[OjtPhoneme],
-    frame_per_phoneme: numpy.ndarray,
-):
+def calc_frame_pitch(query: AudioQuery, moras: list[Mora]) -> ndarray:
     """
     フレームごとのピッチの生成
     Parameters
@@ -247,10 +265,6 @@ def calc_frame_pitch(
         音声合成クエリ
     moras : List[Mora]
         モーラ列
-    phonemes : List[OjtPhoneme]
-        音素列
-    frame_per_phoneme: NDArray
-        音素あたりのフレーム長。端数丸め。
     Returns
     -------
     frame_f0 : NDArray[]
@@ -265,10 +279,7 @@ def calc_frame_pitch(
 
     # Rescale: 時間スケールの変更（モーラ -> フレーム）
     # 母音インデックスに基づき "音素あたりのフレーム長" を "モーラあたりのフレーム長" に集約
-    vowel_indexes = numpy.array(split_mora(phonemes)[2])
-    frame_per_mora = [
-        a.sum() for a in numpy.split(frame_per_phoneme, vowel_indexes[:-1] + 1)
-    ]
+    frame_per_mora = numpy.array(list(map(calc_frame_per_mora, moras)))
     frame_f0 = numpy.repeat(f0, frame_per_mora)
     return frame_f0
 
@@ -619,9 +630,7 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
 
         flatten_moras = apply_prepost_silence(flatten_moras, query)
         frame_per_phoneme = calc_frame_per_phoneme(query, flatten_moras)
-        f0 = calc_frame_pitch(
-            query, flatten_moras, phoneme_data_list, frame_per_phoneme
-        )
+        f0 = calc_frame_pitch(query, flatten_moras)
         phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
 
         # 今まで生成された情報をdecode_forwardにかけ、推論器によって音声波形を生成する

From cc10270cb3f5fed3340c883f92d8380f98a1bbc6 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 11 Dec 2023 16:36:42 +0900
Subject: [PATCH 006/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=95?=
 =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=83=A0=E8=A8=88=E7=AE=97=E3=81=AE=20`=5Fto?=
 =?UTF-8?q?=5Fframe`=20=E3=81=AB=E3=82=88=E3=82=8B=E5=85=B1=E9=80=9A?=
 =?UTF-8?q?=E5=8C=96=20(#844)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor: `_to_frame` による共通化
---
 .../synthesis_engine/synthesis_engine.py      | 22 +++++--------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index d36d9a407..410750383 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -172,22 +172,12 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
     # Apply: グローバル特徴量による補正（話速）
     moras = apply_speed_scale(moras, query)
 
-    # 音素あたりの継続長
-    sec_per_phoneme = numpy.array(
-        [
-            length
-            for mora in moras
-            for length in (
-                [mora.consonant_length] if mora.consonant is not None else []
-            )
-            + [mora.vowel_length]
-        ],
-        dtype=numpy.float32,
-    )
-    # 音素あたりのフレーム長。端数丸め。
-    framerate = 24000 / 256  # framerate 93.75 [frame/sec]
-    frame_per_phoneme = numpy.round(sec_per_phoneme * framerate).astype(numpy.int32)
-
+    frame_per_phoneme: list[ndarray] = []
+    for mora in moras:
+        if mora.consonant:
+            frame_per_phoneme.append(_to_frame(mora.consonant_length))
+        frame_per_phoneme.append(_to_frame(mora.vowel_length))
+    frame_per_phoneme = numpy.array(frame_per_phoneme)
     return frame_per_phoneme
 
 

From 89a8b5342872b3d0c14a8f6e0af92903d2b199dc Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 12 Dec 2023 00:19:12 +0900
Subject: [PATCH 007/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=B3?=
 =?UTF-8?q?=E3=82=A2CDLL=E5=9E=8B=E4=BB=98=E3=81=91=E3=81=AE=E5=88=87?=
 =?UTF-8?q?=E3=82=8A=E5=87=BA=E3=81=97=20(#843)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: コアCDLL型付けの切り出し

* Fix: 参照渡しの戻し廃止
---
 .../synthesis_engine/core_wrapper.py          | 90 +++++++++++++------
 1 file changed, 61 insertions(+), 29 deletions(-)

diff --git a/voicevox_engine/synthesis_engine/core_wrapper.py b/voicevox_engine/synthesis_engine/core_wrapper.py
index 14afa427f..dcb2da101 100644
--- a/voicevox_engine/synthesis_engine/core_wrapper.py
+++ b/voicevox_engine/synthesis_engine/core_wrapper.py
@@ -378,6 +378,64 @@ def load_core(core_dir: Path, use_gpu: bool) -> CDLL:
         raise RuntimeError(f"このコンピュータのアーキテクチャ {platform.machine()} で利用可能なコアがありません")
 
 
+def _type_yukarin_s_forward(core_cdll: CDLL) -> None:
+    """
+    コアDLL `yukarin_s_forward` 関数の型付け
+    Parameters
+    ----------
+    core_cdll : CDLL
+        コアDLL
+    """
+    core_cdll.yukarin_s_forward.argtypes = (
+        c_int,
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_float),
+    )
+    core_cdll.yukarin_s_forward.restype = c_bool
+
+
+def _type_yukarin_sa_forward(core_cdll: CDLL) -> None:
+    """
+    コアDLL `yukarin_sa_forward` 関数の型付け
+    Parameters
+    ----------
+    core_cdll : CDLL
+        コアDLL
+    """
+    core_cdll.yukarin_sa_forward.argtypes = (
+        c_int,
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_float),
+    )
+    core_cdll.yukarin_sa_forward.restype = c_bool
+
+
+def _type_decode_forward(core_cdll: CDLL) -> None:
+    """
+    コアDLL `decode_forward` 関数の型付け
+    Parameters
+    ----------
+    core_cdll : CDLL
+        コアDLL
+    """
+    core_cdll.decode_forward.argtypes = (
+        c_int,
+        c_int,
+        POINTER(c_float),
+        POINTER(c_float),
+        POINTER(c_long),
+        POINTER(c_float),
+    )
+    core_cdll.decode_forward.restype = c_bool
+
+
 class CoreWrapper:
     def __init__(
         self,
@@ -393,9 +451,9 @@ def __init__(
 
         self.core.initialize.restype = c_bool
         self.core.metas.restype = c_char_p
-        self.core.yukarin_s_forward.restype = c_bool
-        self.core.yukarin_sa_forward.restype = c_bool
-        self.core.decode_forward.restype = c_bool
+        _type_yukarin_s_forward(self.core)
+        _type_yukarin_sa_forward(self.core)
+        _type_decode_forward(self.core)
         self.core.last_error_message.restype = c_char_p
 
         self.exist_supported_devices = False
@@ -426,32 +484,6 @@ def __init__(
             self.exist_finalize = True
             exist_cpu_num_threads = True
 
-        self.core.yukarin_s_forward.argtypes = (
-            c_int,
-            POINTER(c_long),
-            POINTER(c_long),
-            POINTER(c_float),
-        )
-        self.core.yukarin_sa_forward.argtypes = (
-            c_int,
-            POINTER(c_long),
-            POINTER(c_long),
-            POINTER(c_long),
-            POINTER(c_long),
-            POINTER(c_long),
-            POINTER(c_long),
-            POINTER(c_long),
-            POINTER(c_float),
-        )
-        self.core.decode_forward.argtypes = (
-            c_int,
-            c_int,
-            POINTER(c_float),
-            POINTER(c_float),
-            POINTER(c_long),
-            POINTER(c_float),
-        )
-
         cwd = os.getcwd()
         os.chdir(core_dir)
         try:

From 3c971cc2e634d9e20df22ab638d7319f523747aa Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 12 Dec 2023 00:23:00 +0900
Subject: [PATCH 008/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`default=5Fsettin?=
 =?UTF-8?q?g.yaml`=20=E3=81=AE=E5=BB=83=E6=AD=A2=20(#855)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor: `default_setting.yaml` の廃止
---
 Dockerfile                               |  2 +-
 default_setting.yaml                     |  2 --
 run.spec                                 |  1 -
 test/e2e/conftest.py                     |  2 +-
 voicevox_engine/setting/SettingLoader.py | 15 ++++++++++++---
 5 files changed, 14 insertions(+), 8 deletions(-)
 delete mode 100644 default_setting.yaml

diff --git a/Dockerfile b/Dockerfile
index ef920ecf4..545449a7c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -227,7 +227,7 @@ COPY --from=download-onnxruntime-env /opt/onnxruntime /opt/onnxruntime
 # Add local files
 ADD ./voicevox_engine /opt/voicevox_engine/voicevox_engine
 ADD ./docs /opt/voicevox_engine/docs
-ADD ./run.py ./generate_licenses.py ./presets.yaml ./default.csv ./default_setting.yaml ./engine_manifest.json /opt/voicevox_engine/
+ADD ./run.py ./generate_licenses.py ./presets.yaml ./default.csv ./engine_manifest.json /opt/voicevox_engine/
 ADD ./speaker_info /opt/voicevox_engine/speaker_info
 ADD ./ui_template /opt/voicevox_engine/ui_template
 ADD ./engine_manifest_assets /opt/voicevox_engine/engine_manifest_assets
diff --git a/default_setting.yaml b/default_setting.yaml
deleted file mode 100644
index 3421e7a6a..000000000
--- a/default_setting.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-allow_origin: null
-cors_policy_mode: localapps
diff --git a/run.spec b/run.spec
index 9f73c6b07..970f2adfa 100644
--- a/run.spec
+++ b/run.spec
@@ -10,7 +10,6 @@ datas = [
     ('default.csv', '.'),
     ('licenses.json', '.'),
     ('presets.yaml', '.'),
-    ('default_setting.yaml', '.'),
     ('ui_template', 'ui_template'),
 ]
 datas += collect_data_files('pyopenjtalk')
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index b656886b6..b6eab18ae 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -14,7 +14,7 @@
 def client():
     synthesis_engines = make_synthesis_engines(use_gpu=False)
     latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
-    setting_loader = SettingLoader(Path("./default_setting.yaml"))
+    setting_loader = SettingLoader(Path("./not_exist.yaml"))
     preset_manager = PresetManager(  # FIXME: impl MockPresetManager
         preset_path=Path("./presets.yaml"),
     )
diff --git a/voicevox_engine/setting/SettingLoader.py b/voicevox_engine/setting/SettingLoader.py
index 3f1669e26..453b7a94d 100644
--- a/voicevox_engine/setting/SettingLoader.py
+++ b/voicevox_engine/setting/SettingLoader.py
@@ -2,21 +2,30 @@
 
 import yaml
 
-from ..utility import engine_root, get_save_dir
+from ..utility import get_save_dir
 from .Setting import Setting
 
-DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yaml"
 USER_SETTING_PATH: Path = get_save_dir() / "setting.yml"
 
 
 class SettingLoader:
     def __init__(self, setting_file_path: Path) -> None:
+        """
+        設定ファイルの管理
+        Parameters
+        ----------
+        setting_file_path : Path
+            設定ファイルのパス。存在しない場合はデフォルト値を設定。
+        """
         self.setting_file_path = setting_file_path
 
     def load_setting_file(self) -> Setting:
+        # 設定値の読み込み
         if not self.setting_file_path.is_file():
-            setting = yaml.safe_load(DEFAULT_SETTING_PATH.read_text(encoding="utf-8"))
+            # 設定ファイルが存在しないためデフォルト値を取得
+            setting = {"allow_origin": None, "cors_policy_mode": "localapps"}
         else:
+            # 指定された設定ファイルから値を取得
             setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8"))
 
         setting = Setting(

From 5f0b4a7d4cc1bb8ecc6df234de1b6c21ed847ce9 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 13 Dec 2023 00:50:52 +0900
Subject: [PATCH 009/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`OjtPhoneme`=20?=
 =?UTF-8?q?=E3=82=AF=E3=83=A9=E3=82=B9=E5=A4=89=E6=95=B0=E3=81=AE=E3=83=97?=
 =?UTF-8?q?=E3=83=A9=E3=82=A4=E3=83=99=E3=83=BC=E3=83=88=E5=8C=96=20(#846)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_acoustic_feature_extractor.py       | 20 ++---
 test/test_synthesis_engine.py                 |  2 +-
 voicevox_engine/acoustic_feature_extractor.py | 82 +++++--------------
 3 files changed, 29 insertions(+), 75 deletions(-)

diff --git a/test/test_acoustic_feature_extractor.py b/test/test_acoustic_feature_extractor.py
index 94ef7ac63..24c70d284 100644
--- a/test/test_acoustic_feature_extractor.py
+++ b/test/test_acoustic_feature_extractor.py
@@ -2,6 +2,8 @@
 
 from voicevox_engine.acoustic_feature_extractor import OjtPhoneme
 
+TRUE_NUM_PHONEME = 45
+
 
 class TestOjtPhoneme(TestCase):
     def setUp(self):
@@ -10,17 +12,13 @@ def setUp(self):
         hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil".split()
         self.ojt_hello_hiho = [OjtPhoneme(s) for s in hello_hiho]
 
-    def test_phoneme_list(self):
-        self.assertEqual(OjtPhoneme.phoneme_list[1], "A")
-        self.assertEqual(OjtPhoneme.phoneme_list[14], "e")
-        self.assertEqual(OjtPhoneme.phoneme_list[26], "m")
-        self.assertEqual(OjtPhoneme.phoneme_list[38], "ts")
-        self.assertEqual(OjtPhoneme.phoneme_list[41], "v")
-
     def test_const(self):
-        TRUE_NUM_PHONEME = 45
-        self.assertEqual(OjtPhoneme.num_phoneme, TRUE_NUM_PHONEME)
-        self.assertEqual(OjtPhoneme.space_phoneme, "pau")
+        self.assertEqual(OjtPhoneme._NUM_PHONEME, TRUE_NUM_PHONEME)
+        self.assertEqual(OjtPhoneme._PHONEME_LIST[1], "A")
+        self.assertEqual(OjtPhoneme._PHONEME_LIST[14], "e")
+        self.assertEqual(OjtPhoneme._PHONEME_LIST[26], "m")
+        self.assertEqual(OjtPhoneme._PHONEME_LIST[38], "ts")
+        self.assertEqual(OjtPhoneme._PHONEME_LIST[41], "v")
 
     def test_convert(self):
         sil_phoneme = OjtPhoneme("sil")
@@ -56,7 +54,7 @@ def test_onehot(self):
             0,
         ]
         for i, phoneme in enumerate(self.ojt_hello_hiho):
-            for j in range(OjtPhoneme.num_phoneme):
+            for j in range(TRUE_NUM_PHONEME):
                 if phoneme_id_list[i] == j:
                     self.assertEqual(phoneme.onehot[j], 1.0)
                 else:
diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index eee0cae66..bdb6de486 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -928,7 +928,7 @@ def synthesis_test_base(self, audio_query: AudioQuery):
         )
 
         # Expects: Apply/Convert/Rescale
-        num_phoneme = OjtPhoneme.num_phoneme
+        num_phoneme = 45
         # mora_phoneme_listのPhoneme ID版
         mora_phoneme_id_list = [OjtPhoneme(p).phoneme_id for p in mora_phoneme_list]
 
diff --git a/voicevox_engine/acoustic_feature_extractor.py b/voicevox_engine/acoustic_feature_extractor.py
index 516727056..066bfbdab 100644
--- a/voicevox_engine/acoustic_feature_extractor.py
+++ b/voicevox_engine/acoustic_feature_extractor.py
@@ -1,74 +1,30 @@
 import numpy
 
+# 音素のリスト
+_P_LIST1 = ("pau", "A", "E", "I", "N", "O", "U", "a", "b", "by")
+_P_LIST2 = ("ch", "cl", "d", "dy", "e", "f", "g", "gw", "gy", "h")
+_P_LIST3 = ("hy", "i", "j", "k", "kw", "ky", "m", "my", "n", "ny")
+_P_LIST4 = ("o", "p", "py", "r", "ry", "s", "sh", "t", "ts", "ty")
+_P_LIST5 = ("u", "v", "w", "y", "z")
+_PHONEME_LIST = _P_LIST1 + _P_LIST2 + _P_LIST3 + _P_LIST4 + _P_LIST5
+
+# 音素リストの要素数
+_NUM_PHONEME = len(_PHONEME_LIST)
+
 
 class OjtPhoneme:
     """
-    OpenJTalkに含まれる音素群クラス
-
-    Attributes
-    ----------
-    phoneme_list : Sequence[str]
-        音素のリスト
-    num_phoneme : int
-        音素リストの要素数
-    space_phoneme : str
-        読点に値する音素
+    OpenJTalkに含まれる音素
     """
 
-    phoneme_list = (
-        "pau",
-        "A",
-        "E",
-        "I",
-        "N",
-        "O",
-        "U",
-        "a",
-        "b",
-        "by",
-        "ch",
-        "cl",
-        "d",
-        "dy",
-        "e",
-        "f",
-        "g",
-        "gw",
-        "gy",
-        "h",
-        "hy",
-        "i",
-        "j",
-        "k",
-        "kw",
-        "ky",
-        "m",
-        "my",
-        "n",
-        "ny",
-        "o",
-        "p",
-        "py",
-        "r",
-        "ry",
-        "s",
-        "sh",
-        "t",
-        "ts",
-        "ty",
-        "u",
-        "v",
-        "w",
-        "y",
-        "z",
-    )
-    num_phoneme = len(phoneme_list)
-    space_phoneme = "pau"
+    _PHONEME_LIST = _PHONEME_LIST
+    _NUM_PHONEME = _NUM_PHONEME
 
     def __init__(self, phoneme: str):
-        # `sil`-to-`pau` (silent to space_phoneme) conversion
+        # 無音をポーズに変換
         if "sil" in phoneme:
-            phoneme = self.space_phoneme
+            phoneme = "pau"
+
         self.phoneme = phoneme
 
     def __eq__(self, o: object):
@@ -84,7 +40,7 @@ def phoneme_id(self):
         id : int
             phoneme_idを返す
         """
-        return self.phoneme_list.index(self.phoneme)
+        return self._PHONEME_LIST.index(self.phoneme)
 
     @property
     def onehot(self):
@@ -95,6 +51,6 @@ def onehot(self):
         onehot : numpy.ndarray
             音素onehotベクトル（listの長さ分の0埋め配列のうち、phoneme id番目が1.0の配列）
         """
-        array = numpy.zeros(self.num_phoneme, dtype=numpy.float32)
+        array = numpy.zeros(self._NUM_PHONEME, dtype=numpy.float32)
         array[self.phoneme_id] = 1.0
         return array

From 3e6a7395f9c9a3d800b111ca653fbba47d77a4a6 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 13 Dec 2023 02:55:00 +0900
Subject: [PATCH 010/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E8=B2=A2?=
 =?UTF-8?q?=E7=8C=AE=E8=80=85=E3=82=AC=E3=82=A4=E3=83=89=E7=AF=80=E3=81=B8?=
 =?UTF-8?q?=E3=81=AE=E9=9B=86=E7=B4=84=20(#838)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 136 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 72 insertions(+), 64 deletions(-)

diff --git a/README.md b/README.md
index bb2488199..e7aef765c 100644
--- a/README.md
+++ b/README.md
@@ -350,26 +350,16 @@ docker run --rm --gpus all -p '127.0.0.1:50021:50021' voicevox/voicevox_engine:n
 
 GPU 版を利用する場合、環境によってエラーが発生することがあります。その場合、`--runtime=nvidia`を`docker run`につけて実行すると解決できることがあります。
 
-## 貢献者の方へ
+## 実行環境構築
 
-Issue を解決するプルリクエストを作成される際は、別の方と同じ Issue に取り組むことを避けるため、
-Issue 側で取り組み始めたことを伝えるか、最初に Draft プルリクエストを作成してください。
-
-[VOICEVOX 非公式 Discord サーバー](https://discord.gg/WMwWetrzuh)にて、開発の議論や雑談を行っています。気軽にご参加ください。
-
-## 環境構築
-
-`Python 3.11.3` を用いて開発されています。
-インストールするには、各 OS ごとの C/C++ コンパイラ、CMake が必要になります。
+`Python 3.11.3` が動作確認済みの環境です。  
+この環境へ必要なライブラリをインストールしてください。  
 
 ```bash
-# 開発に必要なライブラリのインストール
-python -m pip install -r requirements-dev.txt -r requirements-test.txt
-
-# とりあえず実行したいだけなら代わりにこちら
 python -m pip install -r requirements.txt
 ```
 
+
 ## 実行
 
 コマンドライン引数の詳細は以下のコマンドで確認してください。
@@ -458,51 +448,43 @@ Mac では、`--runtime_dir`引数の代わりに`DYLD_LIBRARY_PATH`の指定が
 DYLD_LIBRARY_PATH="/path/to/onnx" python run.py --voicelib_dir="/path/to/voicevox_core"
 ```
 
-## コードフォーマット
+## API ドキュメントの確認
 
-このソフトウェアでは、リモートにプッシュする前にコードフォーマットを確認する仕組み(静的解析ツール)を利用できます。
-利用するには、開発に必要なライブラリのインストールに加えて、以下のコマンドを実行してください。
-プルリクエストを作成する際は、利用することを推奨します。
+[API ドキュメント](https://voicevox.github.io/voicevox_engine/api/)（実体は`docs/api/index.html`）は自動で更新されます。  
+次のコマンドで API ドキュメントを手動で作成することができます。
 
 ```bash
-pre-commit install -t pre-push
+python make_docs.py
 ```
 
-エラーが出た際は、以下のコマンドで修正することが可能です。なお、完全に修正できるわけではないので注意してください。
-
-```bash
-pysen run format lint
-```
+## ユーザー辞書の更新について
 
-## テスト
+以下のコマンドで openjtalk のユーザー辞書をコンパイルできます。
 
 ```bash
-python -m pytest
+python -c "import pyopenjtalk; pyopenjtalk.create_user_dict('default.csv','user.dic')"
 ```
 
-## タイポチェック
+## 貢献者ガイド
 
-[typos](https://github.com/crate-ci/typos) を使ってタイポのチェックを行っています。
-[typos をインストール](https://github.com/crate-ci/typos#install) した後
+### 貢献者の方へ
 
-```bash
-typos
-```
+Issue を解決するプルリクエストを作成される際は、別の方と同じ Issue に取り組むことを避けるため、
+Issue 側で取り組み始めたことを伝えるか、最初に Draft プルリクエストを作成してください。
 
-でタイポチェックを行えます。
-もし誤判定やチェックから除外すべきファイルがあれば
-[設定ファイルの説明](https://github.com/crate-ci/typos#false-positives) に従って`_typos.toml`を編集してください。
+[VOICEVOX 非公式 Discord サーバー](https://discord.gg/WMwWetrzuh)にて、開発の議論や雑談を行っています。気軽にご参加ください。
 
-## API ドキュメントの確認
+### 開発環境構築
 
-[API ドキュメント](https://voicevox.github.io/voicevox_engine/api/)（実体は`docs/api/index.html`）は自動で更新されます。  
-次のコマンドで API ドキュメントを手動で作成することができます。
+`Python 3.11.3` を用いて開発されています。
+インストールするには、各 OS ごとの C/C++ コンパイラ、CMake が必要になります。
 
 ```bash
-python make_docs.py
+# ライブラリのインストール
+python -m pip install -r requirements-dev.txt -r requirements-test.txt
 ```
 
-## ビルド
+### ビルド
 
 この方法でビルドしたものは、リリースで公開されているものとは異なります。
 また、GPU で利用するには cuDNN や CUDA、DirectML などのライブラリが追加で必要となります。
@@ -523,9 +505,44 @@ LIBONNXRUNTIME_PATH="/path/to/libonnxruntime" \
 pyinstaller --noconfirm run.spec
 ```
 
-## 依存関係
+### コードフォーマット
 
-### 更新
+このソフトウェアでは、リモートにプッシュする前にコードフォーマットを確認する仕組み(静的解析ツール)を利用できます。
+利用するには、開発に必要なライブラリのインストールに加えて、以下のコマンドを実行してください。
+プルリクエストを作成する際は、利用することを推奨します。
+
+```bash
+pre-commit install -t pre-push
+```
+
+エラーが出た際は、以下のコマンドで修正することが可能です。なお、完全に修正できるわけではないので注意してください。
+
+```bash
+pysen run format lint
+```
+
+### テスト
+
+```bash
+python -m pytest
+```
+
+### タイポチェック
+
+[typos](https://github.com/crate-ci/typos) を使ってタイポのチェックを行っています。
+[typos をインストール](https://github.com/crate-ci/typos#install) した後
+
+```bash
+typos
+```
+
+でタイポチェックを行えます。
+もし誤判定やチェックから除外すべきファイルがあれば
+[設定ファイルの説明](https://github.com/crate-ci/typos#false-positives) に従って`_typos.toml`を編集してください。
+
+### 依存関係
+
+#### 更新
 
 [Poetry](https://python-poetry.org/) を用いて依存ライブラリのバージョンを固定しています。
 以下のコマンドで操作できます:
@@ -546,8 +563,7 @@ poetry export --without-hashes --with dev -o requirements-dev.txt
 poetry export --without-hashes --with test -o requirements-test.txt
 poetry export --without-hashes --with license -o requirements-license.txt
 ```
-
-### ライセンス
+#### ライセンス
 
 依存ライブラリは「コアビルド時にリンクして一体化しても、コア部のコード非公開 OK」なライセンスを持つ必要があります。  
 主要ライセンスの可否は以下の通りです。
@@ -556,13 +572,19 @@ poetry export --without-hashes --with license -o requirements-license.txt
 - LGPL: OK （コアと動的分離されているため）
 - GPL: NG （全関連コードの公開が必要なため）
 
-## ユーザー辞書の更新について
+### GitHub Actions
 
-以下のコマンドで openjtalk のユーザー辞書をコンパイルできます。
+#### Variables
 
-```bash
-python -c "import pyopenjtalk; pyopenjtalk.create_user_dict('default.csv','user.dic')"
-```
+| name               | description         |
+| :----------------- | :------------------ |
+| DOCKERHUB_USERNAME | Docker Hub ユーザ名 |
+
+#### Secrets
+
+| name            | description                                                             |
+| :-------------- | :---------------------------------------------------------------------- |
+| DOCKERHUB_TOKEN | [Docker Hub アクセストークン](https://hub.docker.com/settings/security) |
 
 ## マルチエンジン機能に関して
 
@@ -581,7 +603,7 @@ VOICEVOX API に準拠した複数のエンジンの Web API をポートを分
 ### マルチエンジン機能への対応方法
 
 VOICEVOX API 準拠エンジンを起動する実行バイナリを作ることで対応が可能です。
-VOICEVOX ENGINE リポジトリを fork し、一部の機能を改造するのが簡単です。
+VOICEVOX ENGINE リポジトリを fork し、一部の機能を改造するのが簡単です（#貢献者ガイド を参照ください）。
 
 改造すべき点はエンジン情報・キャラクター情報・音声合成の３点です。
 
@@ -611,20 +633,6 @@ VOICEVOX エディターにうまく読み込ませられないときは、エ
 
 </details>
 
-## GitHub Actions
-
-### Variables
-
-| name               | description         |
-| :----------------- | :------------------ |
-| DOCKERHUB_USERNAME | Docker Hub ユーザ名 |
-
-### Secrets
-
-| name            | description                                                             |
-| :-------------- | :---------------------------------------------------------------------- |
-| DOCKERHUB_TOKEN | [Docker Hub アクセストークン](https://hub.docker.com/settings/security) |
-
 ## 事例紹介
 
 **[voicevox-client](https://github.com/tuna2134/voicevox-client) [@tuna2134](https://github.com/tuna2134)** ･･･ VOICEVOX ENGINE のための Python ラッパー

From a81905da44b48c9ed00e60d7b80f4b6b5770fb4a Mon Sep 17 00:00:00 2001
From: takana-v <44311840+takana-v@users.noreply.github.com>
Date: Wed, 13 Dec 2023 03:31:44 +0900
Subject: [PATCH 011/177] =?UTF-8?q?issue-labeler=E3=81=AE=E3=83=90?=
 =?UTF-8?q?=E3=83=BC=E3=82=B8=E3=83=A7=E3=83=B3=E3=82=92=E4=BF=AE=E6=AD=A3?=
 =?UTF-8?q?=20(#831)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/labeler.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 06d547dcf..f485a56e1 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -10,7 +10,7 @@ jobs:
   triage:
     runs-on: ubuntu-latest
     steps:
-    - uses: github/issue-labeler@v3
+    - uses: github/issue-labeler@v3.3
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
         configuration-path: .github/labeler.yml

From 3e8b32fa1d9404c93d4fc1eb7d193cafd9ac418f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 13 Dec 2023 03:53:52 +0900
Subject: [PATCH 012/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`MetaStore.py`=20?=
 =?UTF-8?q?docstring=E3=81=AE=E8=BF=BD=E5=8A=A0=20(#845)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/metas/MetasStore.py | 55 +++++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 88a7bc37d..c8367e831 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -16,7 +16,14 @@ class MetasStore:
     """
 
     def __init__(self, engine_speakers_path: Path) -> None:
+        """
+        Parameters
+        ----------
+        engine_speakers_path : Path
+            エンジンに含まれる話者メタ情報ディレクトリのパス。
+        """
         self._engine_speakers_path = engine_speakers_path
+        # エンジンに含まれる各話者のメタ情報
         self._loaded_metas: Dict[str, EngineSpeaker] = {
             folder.name: EngineSpeaker(
                 **json.loads((folder / "metas.json").read_text(encoding="utf-8"))
@@ -25,14 +32,32 @@ def __init__(self, engine_speakers_path: Path) -> None:
         }
 
     def speaker_engine_metas(self, speaker_uuid: str) -> EngineSpeaker:
+        """
+        エンジンに含まれる指定話者のメタ情報を取得
+        Parameters
+        ----------
+        speaker_uuid : str
+            話者UUID
+        Returns
+        -------
+        ret : EngineSpeaker
+            エンジンに含まれる指定話者のメタ情報
+        """
         return self.loaded_metas[speaker_uuid]
 
     def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]:
         """
-        与えられたmetaにエンジンのコア情報を付加して返す
-        core_metas: コアのmetas()が返すJSONのModel
+        コアに含まれる話者メタ情報に、エンジンに含まれる話者メタ情報を統合して返す
+        Parameters
+        ----------
+        core_metas : List[CoreSpeaker]
+            コアに含まれる話者メタ情報
+        Returns
+        -------
+        ret : List[Speaker]
+            エンジンとコアに含まれる話者メタ情報
         """
-
+        # 話者単位でエンジン・コアに含まれるメタ情報を統合
         return [
             Speaker(
                 **self.speaker_engine_metas(speaker_meta.speaker_uuid).dict(),
@@ -45,10 +70,19 @@ def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]:
     # SynthesisEngineBaseによる循環importを修正する
     def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]:
         """
-        与えられたエンジンから、コア・エンジン両方の情報を含んだMetasを返す
+        コアに含まれる話者メタ情報とエンジンに含まれる話者メタ情報を統合
+        Parameters
+        ----------
+        engine : SynthesisEngineBase
+            コアに含まれる話者メタ情報をもったエンジン
+        Returns
+        -------
+        ret : List[Speaker]
+            エンジンとコアに含まれる話者メタ情報
         """
-
+        # コアに含まれる話者メタ情報の収集
         core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)]
+        # エンジンに含まれる話者メタ情報との統合
         return self.combine_metas(core_metas)
 
     @property
@@ -62,9 +96,16 @@ def loaded_metas(self) -> Dict[str, EngineSpeaker]:
 
 def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]:
     """
-    `{style.id: StyleInfo}`の変換テーブル
+    スタイルID に話者メタ情報・スタイルメタ情報を紐付ける対応表を生成
+    Parameters
+    ----------
+    speakers : List[Speaker]
+        話者メタ情報
+    Returns
+    -------
+    ret : Dict[int, Tuple[Speaker, StyleInfo]]
+        スタイルID に話者メタ情報・スタイルメタ情報が紐付いた対応表
     """
-
     lookup_table = dict()
     for speaker in speakers:
         for style in speaker.styles:

From 290333348b93f00b654f1bd062c117b536b74b42 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 13 Dec 2023 04:03:01 +0900
Subject: [PATCH 013/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`LibraryManager`?=
 =?UTF-8?q?=20docstring=E3=83=BB=E5=9E=8B=E3=81=AE=E8=BF=BD=E5=8A=A0=20(#8?=
 =?UTF-8?q?33)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/library_manager.py | 60 +++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 5 deletions(-)

diff --git a/voicevox_engine/library_manager.py b/voicevox_engine/library_manager.py
index 9168a0a4b..3bab1508b 100644
--- a/voicevox_engine/library_manager.py
+++ b/voicevox_engine/library_manager.py
@@ -23,6 +23,8 @@
 
 
 class LibraryManager:
+    """音声ライブラリ (`.vvlib`) の管理"""
+
     def __init__(
         self,
         library_root_dir: Path,
@@ -42,7 +44,13 @@ def __init__(
         self.engine_name = engine_name
         self.engine_uuid = engine_uuid
 
-    def downloadable_libraries(self):
+    def downloadable_libraries(self) -> list[DownloadableLibraryInfo]:
+        """
+        ダウンロード可能ライブラリの一覧を取得
+        Returns
+        -------
+        - : list[DownloadableLibraryInfo]
+        """
         # == ダウンロード情報をネットワーク上から取得する場合
         # url = "https://example.com/downloadable_libraries.json"
         # response = requests.get(url)
@@ -83,9 +91,17 @@ def downloadable_libraries(self):
             return list(map(DownloadableLibraryInfo.parse_obj, libraries))
 
     def installed_libraries(self) -> Dict[str, InstalledLibraryInfo]:
-        library = {}
+        """
+        インストール済み音声ライブラリの情報を取得
+        Returns
+        -------
+        library : Dict[str, InstalledLibraryInfo]
+            インストール済みライブラリの情報
+        """
+        library: Dict[str, InstalledLibraryInfo] = {}
         for library_dir in self.library_root_dir.iterdir():
             if library_dir.is_dir():
+                # ライブラリ情報の取得 from `library_root_dir / f"{library_uuid}" / "metas.json"`
                 library_uuid = os.path.basename(library_dir)
                 with open(library_dir / INFO_FILE, encoding="utf-8") as f:
                     library[library_uuid] = json.load(f)
@@ -93,7 +109,20 @@ def installed_libraries(self) -> Dict[str, InstalledLibraryInfo]:
                     library[library_uuid]["uninstallable"] = True
         return library
 
-    def install_library(self, library_id: str, file: BytesIO):
+    def install_library(self, library_id: str, file: BytesIO) -> Path:
+        """
+        音声ライブラリ (`.vvlib`) のインストール
+        Parameters
+        ----------
+        library_id : str
+            インストール対象ライブラリID
+        file : BytesIO
+            ライブラリファイルBlob
+        Returns
+        -------
+        library_dir : Path
+            インストール済みライブラリの情報
+        """
         for downloadable_library in self.downloadable_libraries():
             if downloadable_library.uuid == library_id:
                 library_info = downloadable_library.dict()
@@ -102,10 +131,16 @@ def install_library(self, library_id: str, file: BytesIO):
             raise HTTPException(
                 status_code=404, detail=f"指定された音声ライブラリ {library_id} が見つかりません。"
             )
+
+        # ライブラリディレクトリの生成
         library_dir = self.library_root_dir / library_id
         library_dir.mkdir(exist_ok=True)
+
+        # metas.jsonの生成
         with open(library_dir / INFO_FILE, "w", encoding="utf-8") as f:
             json.dump(library_info, f, indent=4, ensure_ascii=False)
+
+        # zipファイル形式のバリデーション
         if not zipfile.is_zipfile(file):
             raise HTTPException(
                 status_code=422, detail=f"音声ライブラリ {library_id} は不正なファイルです。"
@@ -117,7 +152,7 @@ def install_library(self, library_id: str, file: BytesIO):
                     status_code=422, detail=f"音声ライブラリ {library_id} は不正なファイルです。"
                 )
 
-            # validate manifest version
+            # マニフェストファイルの存在とファイル形式をバリデーション
             vvlib_manifest = None
             try:
                 vvlib_manifest = json.loads(
@@ -134,6 +169,7 @@ def install_library(self, library_id: str, file: BytesIO):
                     detail=f"指定された音声ライブラリ {library_id} のvvlib_manifest.jsonは不正です。",
                 )
 
+            # マニフェスト形式のバリデーション
             try:
                 VvlibManifest.validate(vvlib_manifest)
             except ValidationError:
@@ -142,11 +178,13 @@ def install_library(self, library_id: str, file: BytesIO):
                     detail=f"指定された音声ライブラリ {library_id} のvvlib_manifest.jsonに不正なデータが含まれています。",
                 )
 
+            # ライブラリバージョンのバリデーション
             if not Version.is_valid(vvlib_manifest["version"]):
                 raise HTTPException(
                     status_code=422, detail=f"指定された音声ライブラリ {library_id} のversionが不正です。"
                 )
 
+            # マニフェストバージョンのバリデーション
             try:
                 vvlib_manifest_version = Version.parse(
                     vvlib_manifest["manifest_version"]
@@ -156,33 +194,45 @@ def install_library(self, library_id: str, file: BytesIO):
                     status_code=422,
                     detail=f"指定された音声ライブラリ {library_id} のmanifest_versionが不正です。",
                 )
-
             if vvlib_manifest_version > self.supported_vvlib_version:
                 raise HTTPException(
                     status_code=422, detail=f"指定された音声ライブラリ {library_id} は未対応です。"
                 )
 
+            # ライブラリ-エンジン対応のバリデーション
             if vvlib_manifest["engine_uuid"] != self.engine_uuid:
                 raise HTTPException(
                     status_code=422,
                     detail=f"指定された音声ライブラリ {library_id} は{self.engine_name}向けではありません。",
                 )
 
+            # 展開によるインストール
             zf.extractall(library_dir)
+
         return library_dir
 
     def uninstall_library(self, library_id: str):
+        """
+        インストール済み音声ライブラリのアンインストール
+        Parameters
+        ----------
+        library_id : str
+            インストール対象ライブラリID
+        """
+        # 対象ライブラリがインストール済みであることの確認
         installed_libraries = self.installed_libraries()
         if library_id not in installed_libraries.keys():
             raise HTTPException(
                 status_code=404, detail=f"指定された音声ライブラリ {library_id} はインストールされていません。"
             )
 
+        # アンインストール許可フラグのバリデーション
         if not installed_libraries[library_id]["uninstallable"]:
             raise HTTPException(
                 status_code=403, detail=f"指定された音声ライブラリ {library_id} はアンインストールできません。"
             )
 
+        # ディレクトリ削除によるアンインストール
         try:
             shutil.rmtree(self.library_root_dir / library_id)
         except Exception:

From 56e3971c0a5b1339e863e53bcf0b716f0b687bbd Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 13 Dec 2023 04:18:29 +0900
Subject: [PATCH 014/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`round`=20?=
 =?UTF-8?q?=E6=8C=99=E5=8B=95=E3=83=A1=E3=83=A2=E3=81=AE=E8=BF=BD=E5=8A=A0?=
 =?UTF-8?q?=20(#852)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 voicevox_engine/synthesis_engine/synthesis_engine.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index 410750383..da74a1d78 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -183,6 +183,7 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
 
 def _to_frame(sec: float) -> ndarray:
     FRAMERATE = 93.75  # 24000 / 256 [frame/sec]
+    # NOTE: `round` は偶数丸め。移植時に取扱い注意。詳細は voicevox_engine#552
     return numpy.round(sec * FRAMERATE).astype(numpy.int32)
 
 

From e90a2f7e925c7a3c7c8c0dea9aa93a9177e121b2 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 13 Dec 2023 12:55:41 +0900
Subject: [PATCH 015/177] =?UTF-8?q?README=E3=82=92=E3=83=A6=E3=83=BC?=
 =?UTF-8?q?=E3=82=B6=E3=83=BC=E3=82=AC=E3=82=A4=E3=83=89=E3=83=BB=E9=96=8B?=
 =?UTF-8?q?=E7=99=BA=E8=80=85=EF=BC=8B=E8=B2=A2=E7=8C=AE=E8=80=85=E3=82=AC?=
 =?UTF-8?q?=E3=82=A4=E3=83=89=E3=81=AB=E5=8C=BA=E5=88=86=E5=8C=96=20(#858)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 164 ++++++++++++++++++++++++++----------------------------
 1 file changed, 80 insertions(+), 84 deletions(-)

diff --git a/README.md b/README.md
index e7aef765c..5f4594717 100644
--- a/README.md
+++ b/README.md
@@ -17,18 +17,38 @@
 コアは [VOICEVOX CORE](https://github.com/VOICEVOX/voicevox_core/) 、
 全体構成は [こちら](https://github.com/VOICEVOX/voicevox/blob/main/docs/%E5%85%A8%E4%BD%93%E6%A7%8B%E6%88%90.md) に詳細があります。）
 
-## ダウンロード
+## ユーザーガイド
+
+### ダウンロード
 
 [こちら](https://github.com/VOICEVOX/voicevox_engine/releases/latest)から対応するエンジンをダウンロードしてください。
 
-## API ドキュメント
+### API ドキュメント
 
 [API ドキュメント](https://voicevox.github.io/voicevox_engine/api/)をご参照ください。
 
 VOICEVOX エンジンもしくはエディタを起動した状態で http://127.0.0.1:50021/docs にアクセスすると、起動中のエンジンのドキュメントも確認できます。  
 今後の方針などについては [VOICEVOX 音声合成エンジンとの連携](./docs/VOICEVOX音声合成エンジンとの連携.md) も参考になるかもしれません。
 
-リクエスト・レスポンスの文字コードはすべて UTF-8 です。
+### Docker イメージ
+
+#### CPU
+
+```bash
+docker pull voicevox/voicevox_engine:cpu-ubuntu20.04-latest
+docker run --rm -p '127.0.0.1:50021:50021' voicevox/voicevox_engine:cpu-ubuntu20.04-latest
+```
+
+#### GPU
+
+```bash
+docker pull voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
+docker run --rm --gpus all -p '127.0.0.1:50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
+```
+
+##### トラブルシューティング
+
+GPU 版を利用する場合、環境によってエラーが発生することがあります。その場合、`--runtime=nvidia`を`docker run`につけて実行すると解決できることがあります。
 
 ### HTTP リクエストで音声合成するサンプルコード
 
@@ -53,7 +73,7 @@ curl -s \
 
 `style_id` に指定する値は `/speakers` エンドポイントで得られます。
 
-### 読み方を AquesTalk風記法で取得・修正するサンプルコード
+### 読み方を AquesTalk 風記法で取得・修正するサンプルコード
 
 `/audio_query`のレスポンスにはエンジンが判断した読み方が AquesTalk 風記法([本家の記法](https://www.a-quest.com/archive/manual/siyo_onseikigou.pdf)とは一部異なります)で記録されています。
 記法は次のルールに従います。
@@ -283,6 +303,10 @@ VOICEVOX ではセキュリティ保護のため`localhost`・`127.0.0.1`・`app
 3. 保存ボタンを押して、変更を確定してください。
 4. 設定の適用にはエンジンの再起動が必要です。必要に応じて再起動をしてください。
 
+### 文字コード
+
+リクエスト・レスポンスの文字コードはすべて UTF-8 です。
+
 ### その他の引数
 
 エンジン起動時に引数を指定できます。詳しいことは`-h`引数でヘルプを確認してください。
@@ -326,41 +350,33 @@ options:
                         プリセットファイルを指定できます。指定がない場合、環境変数 VV_PRESET_FILE、--voicevox_dirのpresets.yaml、実行ファイルのディレクトリのpresets.yamlを順に探します。
 ```
 
-## アップデート
+### アップデート
 
 エンジンディレクトリ内にあるファイルを全て消去し、新しいものに置き換えてください。
 
-## Docker イメージ
-
-### CPU
+## 開発者・貢献者向けガイド
 
-```bash
-docker pull voicevox/voicevox_engine:cpu-ubuntu20.04-latest
-docker run --rm -p '127.0.0.1:50021:50021' voicevox/voicevox_engine:cpu-ubuntu20.04-latest
-```
-
-### GPU
-
-```bash
-docker pull voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
-docker run --rm --gpus all -p '127.0.0.1:50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
-```
+### 貢献者の方へ
 
-#### トラブルシューティング
+Issue を解決するプルリクエストを作成される際は、別の方と同じ Issue に取り組むことを避けるため、
+Issue 側で取り組み始めたことを伝えるか、最初に Draft プルリクエストを作成してください。
 
-GPU 版を利用する場合、環境によってエラーが発生することがあります。その場合、`--runtime=nvidia`を`docker run`につけて実行すると解決できることがあります。
+[VOICEVOX 非公式 Discord サーバー](https://discord.gg/WMwWetrzuh)にて、開発の議論や雑談を行っています。気軽にご参加ください。
 
-## 実行環境構築
+### 環境構築
 
-`Python 3.11.3` が動作確認済みの環境です。  
-この環境へ必要なライブラリをインストールしてください。  
+`Python 3.11.3` を用いて開発されています。
+インストールするには、各 OS ごとの C/C++ コンパイラ、CMake が必要になります。
 
 ```bash
+# 実行環境のインストール
 python -m pip install -r requirements.txt
-```
 
+# 開発環境・テスト環境のインストール
+python -m pip install -r requirements-dev.txt -r requirements-test.txt
+```
 
-## 実行
+### 実行
 
 コマンドライン引数の詳細は以下のコマンドで確認してください。
 
@@ -393,30 +409,28 @@ python run.py --output_log_utf8
 # もしくは VV_OUTPUT_LOG_UTF8=1 python run.py
 ```
 
-### CPU スレッド数を指定する
+#### CPU スレッド数を指定する
 
 CPU スレッド数が未指定の場合は、論理コア数の半分か物理コア数が使われます。（殆どの CPU で、これは全体の処理能力の半分です）  
 もし IaaS 上で実行していたり、専用サーバーで実行している場合など、  
 エンジンが使う処理能力を調節したい場合は、CPU スレッド数を指定することで実現できます。
 
 - 実行時引数で指定する
-
   ```bash
   python run.py --voicevox_dir=$VOICEVOX_DIR --cpu_num_threads=4
   ```
-
 - 環境変数で指定する
   ```bash
   export VV_CPU_NUM_THREADS=4
   python run.py --voicevox_dir=$VOICEVOX_DIR
   ```
 
-### 過去のバージョンのコアを使う
+#### 過去のバージョンのコアを使う
 
 VOICEVOX Core 0.5.4 以降のコアを使用する事が可能です。  
 Mac での libtorch 版コアのサポートはしていません。
 
-#### 過去のバイナリを指定する
+##### 過去のバイナリを指定する
 
 製品版 VOICEVOX もしくはコンパイル済みエンジンのディレクトリを`--voicevox_dir`引数で指定すると、そのバージョンのコアが使用されます。
 
@@ -430,7 +444,7 @@ Mac では、`DYLD_LIBRARY_PATH`の指定が必要です。
 DYLD_LIBRARY_PATH="/path/to/voicevox" python run.py --voicevox_dir="/path/to/voicevox"
 ```
 
-#### 音声ライブラリを直接指定する
+##### 音声ライブラリを直接指定する
 
 [VOICEVOX Core の zip ファイル](https://github.com/VOICEVOX/voicevox_core/releases)を解凍したディレクトリを`--voicelib_dir`引数で指定します。  
 また、コアのバージョンに合わせて、[libtorch](https://pytorch.org/)や[onnxruntime](https://github.com/microsoft/onnxruntime)のディレクトリを`--runtime_dir`引数で指定します。  
@@ -448,42 +462,6 @@ Mac では、`--runtime_dir`引数の代わりに`DYLD_LIBRARY_PATH`の指定が
 DYLD_LIBRARY_PATH="/path/to/onnx" python run.py --voicelib_dir="/path/to/voicevox_core"
 ```
 
-## API ドキュメントの確認
-
-[API ドキュメント](https://voicevox.github.io/voicevox_engine/api/)（実体は`docs/api/index.html`）は自動で更新されます。  
-次のコマンドで API ドキュメントを手動で作成することができます。
-
-```bash
-python make_docs.py
-```
-
-## ユーザー辞書の更新について
-
-以下のコマンドで openjtalk のユーザー辞書をコンパイルできます。
-
-```bash
-python -c "import pyopenjtalk; pyopenjtalk.create_user_dict('default.csv','user.dic')"
-```
-
-## 貢献者ガイド
-
-### 貢献者の方へ
-
-Issue を解決するプルリクエストを作成される際は、別の方と同じ Issue に取り組むことを避けるため、
-Issue 側で取り組み始めたことを伝えるか、最初に Draft プルリクエストを作成してください。
-
-[VOICEVOX 非公式 Discord サーバー](https://discord.gg/WMwWetrzuh)にて、開発の議論や雑談を行っています。気軽にご参加ください。
-
-### 開発環境構築
-
-`Python 3.11.3` を用いて開発されています。
-インストールするには、各 OS ごとの C/C++ コンパイラ、CMake が必要になります。
-
-```bash
-# ライブラリのインストール
-python -m pip install -r requirements-dev.txt -r requirements-test.txt
-```
-
 ### ビルド
 
 この方法でビルドしたものは、リリースで公開されているものとは異なります。
@@ -563,6 +541,7 @@ poetry export --without-hashes --with dev -o requirements-dev.txt
 poetry export --without-hashes --with test -o requirements-test.txt
 poetry export --without-hashes --with license -o requirements-license.txt
 ```
+
 #### ライセンス
 
 依存ライブラリは「コアビルド時にリンクして一体化しても、コア部のコード非公開 OK」なライセンスを持つ必要があります。  
@@ -572,21 +551,15 @@ poetry export --without-hashes --with license -o requirements-license.txt
 - LGPL: OK （コアと動的分離されているため）
 - GPL: NG （全関連コードの公開が必要なため）
 
-### GitHub Actions
-
-#### Variables
+### ユーザー辞書の更新について
 
-| name               | description         |
-| :----------------- | :------------------ |
-| DOCKERHUB_USERNAME | Docker Hub ユーザ名 |
-
-#### Secrets
+以下のコマンドで openjtalk のユーザー辞書をコンパイルできます。
 
-| name            | description                                                             |
-| :-------------- | :---------------------------------------------------------------------- |
-| DOCKERHUB_TOKEN | [Docker Hub アクセストークン](https://hub.docker.com/settings/security) |
+```bash
+python -c "import pyopenjtalk; pyopenjtalk.create_user_dict('default.csv','user.dic')"
+```
 
-## マルチエンジン機能に関して
+### マルチエンジン機能に関して
 
 VOICEVOX エディターでは、複数のエンジンを同時に起動することができます。
 この機能を利用することで、自作の音声合成エンジンや既存の音声合成エンジンを VOICEVOX エディター上で動かすことが可能です。
@@ -595,15 +568,15 @@ VOICEVOX エディターでは、複数のエンジンを同時に起動する
 
 <details>
 
-### マルチエンジン機能の仕組み
+#### マルチエンジン機能の仕組み
 
 VOICEVOX API に準拠した複数のエンジンの Web API をポートを分けて起動し、統一的に扱うことでマルチエンジン機能を実現しています。
 エディターがそれぞれのエンジンを実行バイナリ経由で起動し、EngineID と結びつけて設定や状態を個別管理します。
 
-### マルチエンジン機能への対応方法
+#### マルチエンジン機能への対応方法
 
 VOICEVOX API 準拠エンジンを起動する実行バイナリを作ることで対応が可能です。
-VOICEVOX ENGINE リポジトリを fork し、一部の機能を改造するのが簡単です（#貢献者ガイド を参照ください）。
+VOICEVOX ENGINE リポジトリを fork し、一部の機能を改造するのが簡単です。
 
 改造すべき点はエンジン情報・キャラクター情報・音声合成の３点です。
 
@@ -619,7 +592,7 @@ VOICEVOX ENGINE リポジトリを fork し、一部の機能を改造するの
 VOICEVOX API での音声合成は、エンジン側で音声合成クエリ`AudioQuery`の初期値を作成してユーザーに返し、ユーザーが必要に応じてクエリを編集したあと、エンジンがクエリに従って音声合成することで実現しています。
 クエリ作成は`/audio_query`エンドポイントで、音声合成は`/synthesis`エンドポイントで行っており、最低この２つに対応すれば VOICEVOX API に準拠したことになります。
 
-### マルチエンジン機能対応エンジンの配布方法
+#### マルチエンジン機能対応エンジンの配布方法
 
 VVPP ファイルとして配布するのがおすすめです。
 VVPP は「VOICEVOX プラグインパッケージ」の略で、中身はビルドしたエンジンなどを含んだディレクトリの Zip ファイルです。
@@ -633,6 +606,29 @@ VOICEVOX エディターにうまく読み込ませられないときは、エ
 
 </details>
 
+### API ドキュメントの確認
+
+[API ドキュメント](https://voicevox.github.io/voicevox_engine/api/)（実体は`docs/api/index.html`）は自動で更新されます。  
+次のコマンドで API ドキュメントを手動で作成することができます。
+
+```bash
+python make_docs.py
+```
+
+### GitHub Actions
+
+#### Variables
+
+| name               | description         |
+| :----------------- | :------------------ |
+| DOCKERHUB_USERNAME | Docker Hub ユーザ名 |
+
+#### Secrets
+
+| name            | description                                                             |
+| :-------------- | :---------------------------------------------------------------------- |
+| DOCKERHUB_TOKEN | [Docker Hub アクセストークン](https://hub.docker.com/settings/security) |
+
 ## 事例紹介
 
 **[voicevox-client](https://github.com/tuna2134/voicevox-client) [@tuna2134](https://github.com/tuna2134)** ･･･ VOICEVOX ENGINE のための Python ラッパー

From 6f5c384d90555981759db0b2ef66fb699659ee74 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 13 Dec 2023 12:57:27 +0900
Subject: [PATCH 016/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=A6?=
 =?UTF-8?q?=E3=83=BC=E3=82=B6=E3=83=BC=E8=BE=9E=E6=9B=B8=E3=81=AEdocstring?=
 =?UTF-8?q?=E3=83=BB=E5=A4=89=E6=95=B0=E5=90=8D=E3=83=BB=E5=9E=8B=E3=83=BB?=
 =?UTF-8?q?=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=20(#836)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_user_dict.py       |   6 +-
 voicevox_engine/user_dict.py | 205 ++++++++++++++++++++++++++++++-----
 2 files changed, 181 insertions(+), 30 deletions(-)

diff --git a/test/test_user_dict.py b/test/test_user_dict.py
index 4280bbe53..6014ac721 100644
--- a/test/test_user_dict.py
+++ b/test/test_user_dict.py
@@ -11,8 +11,8 @@
 from voicevox_engine.model import UserDictWord, WordTypes
 from voicevox_engine.part_of_speech_data import MAX_PRIORITY, part_of_speech_data
 from voicevox_engine.user_dict import (
+    _create_word,
     apply_word,
-    create_word,
     delete_word,
     import_user_dict,
     read_dict,
@@ -90,7 +90,7 @@ def test_read_not_exist_json(self):
     def test_create_word(self):
         # 将来的に品詞などが追加された時にテストを増やす
         self.assertEqual(
-            create_word(surface="test", pronunciation="テスト", accent_type=1),
+            _create_word(surface="test", pronunciation="テスト", accent_type=1),
             UserDictWord(
                 surface="ｔｅｓｔ",
                 priority=5,
@@ -219,7 +219,7 @@ def test_priority(self):
         for pos in part_of_speech_data:
             for i in range(MAX_PRIORITY + 1):
                 self.assertEqual(
-                    create_word(
+                    _create_word(
                         surface="test",
                         pronunciation="テスト",
                         accent_type=1,
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
index 7db07a721..f720ac4aa 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict.py
@@ -21,27 +21,39 @@
 if not save_dir.is_dir():
     save_dir.mkdir(parents=True)
 
-default_dict_path = root_dir / "default.csv"
-user_dict_path = save_dir / "user_dict.json"
-compiled_dict_path = save_dir / "user.dic"
+default_dict_path = root_dir / "default.csv"  # VOICEVOXデフォルト辞書ファイルのパス
+user_dict_path = save_dir / "user_dict.json"  # ユーザー辞書ファイルのパス
+compiled_dict_path = save_dir / "user.dic"  # コンパイル済み辞書ファイルのパス
 
 
+# 同時書き込みの制御
 mutex_user_dict = threading.Lock()
 mutex_openjtalk_dict = threading.Lock()
 
 
 @mutex_wrapper(mutex_user_dict)
-def write_to_json(user_dict: Dict[str, UserDictWord], user_dict_path: Path):
+def _write_to_json(user_dict: Dict[str, UserDictWord], user_dict_path: Path) -> None:
+    """
+    ユーザー辞書ファイルへのユーザー辞書データ書き込み
+    Parameters
+    ----------
+    user_dict : Dict[str, UserDictWord]
+        ユーザー辞書データ
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    """
     converted_user_dict = {}
     for word_uuid, word in user_dict.items():
         word_dict = word.dict()
-        word_dict["cost"] = priority2cost(
+        word_dict["cost"] = _priority2cost(
             word_dict["context_id"], word_dict["priority"]
         )
         del word_dict["priority"]
         converted_user_dict[word_uuid] = word_dict
     # 予めjsonに変換できることを確かめる
     user_dict_json = json.dumps(converted_user_dict, ensure_ascii=False)
+
+    # ユーザー辞書ファイルへの書き込み
     user_dict_path.write_text(user_dict_json, encoding="utf-8")
 
 
@@ -50,14 +62,29 @@ def update_dict(
     default_dict_path: Path = default_dict_path,
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
+) -> None:
+    """
+    辞書の更新
+    Parameters
+    ----------
+    default_dict_path : Path
+        デフォルト辞書ファイルのパス
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
     random_string = uuid4()
-    tmp_csv_path = save_dir / f".tmp.dict_csv-{random_string}"
-    tmp_compiled_path = save_dir / f".tmp.dict_compiled-{random_string}"
+    tmp_csv_path = save_dir / f".tmp.dict_csv-{random_string}"  # csv形式辞書データの一時保存ファイル
+    tmp_compiled_path = (
+        save_dir / f".tmp.dict_compiled-{random_string}"
+    )  # コンパイル済み辞書データの一時保存ファイル
 
     try:
         # 辞書.csvを作成
         csv_text = ""
+
+        # デフォルト辞書データの追加
         if not default_dict_path.is_file():
             print("Warning: Cannot find default dictionary.", file=sys.stderr)
             return
@@ -65,6 +92,8 @@ def update_dict(
         if default_dict == default_dict.rstrip():
             default_dict += "\n"
         csv_text += default_dict
+
+        # ユーザー辞書データの追加
         user_dict = read_dict(user_dict_path=user_dict_path)
         for word_uuid in user_dict:
             word = user_dict[word_uuid]
@@ -77,7 +106,7 @@ def update_dict(
             ).format(
                 surface=word.surface,
                 context_id=word.context_id,
-                cost=priority2cost(word.context_id, word.priority),
+                cost=_priority2cost(word.context_id, word.priority),
                 part_of_speech=word.part_of_speech,
                 part_of_speech_detail_1=word.part_of_speech_detail_1,
                 part_of_speech_detail_2=word.part_of_speech_detail_2,
@@ -91,6 +120,7 @@ def update_dict(
                 mora_count=word.mora_count,
                 accent_associative_rule=word.accent_associative_rule,
             )
+        # 辞書データを辞書.csv へ一時保存
         tmp_csv_path.write_text(csv_text, encoding="utf-8")
 
         # 辞書.csvをOpenJTalk用にコンパイル
@@ -119,10 +149,23 @@ def update_dict(
 
 @mutex_wrapper(mutex_user_dict)
 def read_dict(user_dict_path: Path = user_dict_path) -> Dict[str, UserDictWord]:
+    """
+    ユーザー辞書の読み出し
+    Parameters
+    ----------
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    Returns
+    -------
+    result : Dict[str, UserDictWord]
+        ユーザー辞書
+    """
+    # 指定ユーザー辞書が存在しない場合、空辞書を返す
     if not user_dict_path.is_file():
         return {}
+
     with user_dict_path.open(encoding="utf-8") as f:
-        result = {}
+        result: Dict[str, UserDictWord] = {}
         for word_uuid, word in json.load(f).items():
             # cost2priorityで変換を行う際にcontext_idが必要となるが、
             # 0.12以前の辞書は、context_idがハードコーディングされていたためにユーザー辞書内に保管されていない
@@ -131,20 +174,39 @@ def read_dict(user_dict_path: Path = user_dict_path) -> Dict[str, UserDictWord]:
                 word["context_id"] = part_of_speech_data[
                     WordTypes.PROPER_NOUN
                 ].context_id
-            word["priority"] = cost2priority(word["context_id"], word["cost"])
+            word["priority"] = _cost2priority(word["context_id"], word["cost"])
             del word["cost"]
             result[str(UUID(word_uuid))] = UserDictWord(**word)
 
     return result
 
 
-def create_word(
+def _create_word(
     surface: str,
     pronunciation: str,
     accent_type: int,
     word_type: Optional[WordTypes] = None,
     priority: Optional[int] = None,
 ) -> UserDictWord:
+    """
+    単語オブジェクトの生成
+    Parameters
+    ----------
+    surface : str
+        単語情報
+    pronunciation : str
+        単語情報
+    accent_type : int
+        単語情報
+    word_type : Optional[WordTypes]
+        品詞
+    priority : Optional[int]
+        優先度
+    Returns
+    -------
+    : UserDictWord
+        単語オブジェクト
+    """
     if word_type is None:
         word_type = WordTypes.PROPER_NOUN
     if word_type not in part_of_speech_data.keys():
@@ -181,7 +243,31 @@ def apply_word(
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
 ) -> str:
-    word = create_word(
+    """
+    新規単語の追加
+    Parameters
+    ----------
+    surface : str
+        単語情報
+    pronunciation : str
+        単語情報
+    accent_type : int
+        単語情報
+    word_type : Optional[WordTypes]
+        品詞
+    priority : Optional[int]
+        優先度
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    Returns
+    -------
+    word_uuid : UserDictWord
+        追加された単語に発行されたUUID
+    """
+    # 新規単語の追加による辞書データの更新
+    word = _create_word(
         surface=surface,
         pronunciation=pronunciation,
         accent_type=accent_type,
@@ -191,8 +277,11 @@ def apply_word(
     user_dict = read_dict(user_dict_path=user_dict_path)
     word_uuid = str(uuid4())
     user_dict[word_uuid] = word
-    write_to_json(user_dict, user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict, user_dict_path)
     update_dict(user_dict_path=user_dict_path, compiled_dict_path=compiled_dict_path)
+
     return word_uuid
 
 
@@ -205,19 +294,44 @@ def rewrite_word(
     priority: Optional[int] = None,
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
-    word = create_word(
+) -> None:
+    """
+    既存単語の上書き更新
+    Parameters
+    ----------
+    word_uuid : str
+        単語UUID
+    surface : str
+        単語情報
+    pronunciation : str
+        単語情報
+    accent_type : int
+        単語情報
+    word_type : Optional[WordTypes]
+        品詞
+    priority : Optional[int]
+        優先度
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
+    word = _create_word(
         surface=surface,
         pronunciation=pronunciation,
         accent_type=accent_type,
         word_type=word_type,
         priority=priority,
     )
+
+    # 既存単語の上書きによる辞書データの更新
     user_dict = read_dict(user_dict_path=user_dict_path)
     if word_uuid not in user_dict:
         raise HTTPException(status_code=422, detail="UUIDに該当するワードが見つかりませんでした")
     user_dict[word_uuid] = word
-    write_to_json(user_dict, user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict, user_dict_path)
     update_dict(user_dict_path=user_dict_path, compiled_dict_path=compiled_dict_path)
 
 
@@ -225,12 +339,26 @@ def delete_word(
     word_uuid: str,
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
+) -> None:
+    """
+    単語の削除
+    Parameters
+    ----------
+    word_uuid : str
+        単語UUID
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
+    # 既存単語の削除による辞書データの更新
     user_dict = read_dict(user_dict_path=user_dict_path)
     if word_uuid not in user_dict:
         raise HTTPException(status_code=422, detail="IDに該当するワードが見つかりませんでした")
     del user_dict[word_uuid]
-    write_to_json(user_dict, user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict, user_dict_path)
     update_dict(user_dict_path=user_dict_path, compiled_dict_path=compiled_dict_path)
 
 
@@ -240,8 +368,23 @@ def import_user_dict(
     user_dict_path: Path = user_dict_path,
     default_dict_path: Path = default_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
-    # 念のため型チェックを行う
+) -> None:
+    """
+    ユーザー辞書のインポート
+    Parameters
+    ----------
+    dict_data : Dict[str, UserDictWord]
+        インポートするユーザー辞書のデータ
+    override : bool
+        重複したエントリがあった場合、上書きするかどうか
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    default_dict_path : Path
+        デフォルト辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
+    # インポートする辞書データのバリデーション
     for word_uuid, word in dict_data.items():
         UUID(word_uuid)
         assert isinstance(word, UserDictWord)
@@ -263,12 +406,20 @@ def import_user_dict(
                 break
         else:
             raise ValueError("対応していない品詞です")
+
+    # 既存辞書の読み出し
     old_dict = read_dict(user_dict_path=user_dict_path)
+
+    # 辞書データの更新
+    # 重複エントリの上書き
     if override:
         new_dict = {**old_dict, **dict_data}
+    # 重複エントリの保持
     else:
         new_dict = {**dict_data, **old_dict}
-    write_to_json(user_dict=new_dict, user_dict_path=user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict=new_dict, user_dict_path=user_dict_path)
     update_dict(
         default_dict_path=default_dict_path,
         user_dict_path=user_dict_path,
@@ -276,23 +427,23 @@ def import_user_dict(
     )
 
 
-def search_cost_candidates(context_id: int) -> List[int]:
+def _search_cost_candidates(context_id: int) -> List[int]:
     for value in part_of_speech_data.values():
         if value.context_id == context_id:
             return value.cost_candidates
     raise HTTPException(status_code=422, detail="品詞IDが不正です")
 
 
-def cost2priority(context_id: int, cost: conint(ge=-32768, le=32767)) -> int:
-    cost_candidates = search_cost_candidates(context_id)
+def _cost2priority(context_id: int, cost: conint(ge=-32768, le=32767)) -> int:
+    cost_candidates = _search_cost_candidates(context_id)
     # cost_candidatesの中にある値で最も近い値を元にpriorityを返す
     # 参考: https://qiita.com/Krypf/items/2eada91c37161d17621d
     # この関数とpriority2cost関数によって、辞書ファイルのcostを操作しても最も近いpriorityのcostに上書きされる
     return MAX_PRIORITY - np.argmin(np.abs(np.array(cost_candidates) - cost))
 
 
-def priority2cost(
+def _priority2cost(
     context_id: int, priority: conint(ge=MIN_PRIORITY, le=MAX_PRIORITY)
 ) -> int:
-    cost_candidates = search_cost_candidates(context_id)
+    cost_candidates = _search_cost_candidates(context_id)
     return cost_candidates[MAX_PRIORITY - priority]

From 6e74fbc98ea9ebbb02ce0ad5beeae921d5bd8d07 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Thu, 14 Dec 2023 01:58:46 +0900
Subject: [PATCH 017/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E8=BE=9E?=
 =?UTF-8?q?=E6=9B=B8=E3=82=A4=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB?=
 =?UTF-8?q?=E3=82=A8=E3=82=AF=E3=82=B9=E3=83=9D=E3=83=BC=E3=83=88=E3=82=AC?=
 =?UTF-8?q?=E3=82=A4=E3=83=89=20(#861)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 README.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 5f4594717..b667f22e5 100644
--- a/README.md
+++ b/README.md
@@ -196,6 +196,14 @@ word_uuid="cce59b5f-86ab-42b9-bb75-9fd3407f1e2d"
 curl -s -X DELETE "127.0.0.1:50021/user_dict_word/$word_uuid"
 ```
 
+#### 辞書のインポート&エクスポート
+
+エンジンの[設定ページ](http://127.0.0.1:50021/setting)内の「ユーザー辞書のエクスポート&インポート」節で、ユーザー辞書のインポート&エクスポートが可能です。
+
+他にも API でユーザー辞書のインポート&エクスポートが可能です。  
+インポートには `POST /import_user_dict`、エクスポートには `GET /user_dict` を利用します。  
+引数等の詳細は API ドキュメントをご覧ください。
+
 ### プリセット機能について
 
 `presets.yaml`を編集することで話者や話速などのプリセットを使うことができます。
@@ -551,14 +559,6 @@ poetry export --without-hashes --with license -o requirements-license.txt
 - LGPL: OK （コアと動的分離されているため）
 - GPL: NG （全関連コードの公開が必要なため）
 
-### ユーザー辞書の更新について
-
-以下のコマンドで openjtalk のユーザー辞書をコンパイルできます。
-
-```bash
-python -c "import pyopenjtalk; pyopenjtalk.create_user_dict('default.csv','user.dic')"
-```
-
 ### マルチエンジン機能に関して
 
 VOICEVOX エディターでは、複数のエンジンを同時に起動することができます。

From 3809c027cd15e334fd8a5fef674431c5cc20d82f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Thu, 14 Dec 2023 03:33:46 +0900
Subject: [PATCH 018/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=89?=
 =?UTF-8?q?=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E7=94=9F=E6=88=90?=
 =?UTF-8?q?=E3=81=AE=20`build=5Futil`=20=E7=A7=BB=E6=A4=8D=20(#866)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/upload-gh-pages.yml   | 2 +-
 README.md                               | 2 +-
 make_docs.py => build_util/make_docs.py | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename make_docs.py => build_util/make_docs.py (100%)

diff --git a/.github/workflows/upload-gh-pages.yml b/.github/workflows/upload-gh-pages.yml
index 9e78d0a1b..e545c0bf6 100644
--- a/.github/workflows/upload-gh-pages.yml
+++ b/.github/workflows/upload-gh-pages.yml
@@ -34,7 +34,7 @@ jobs:
 
       - name: Make documents
         run: |
-          python make_docs.py
+          PYTHONPATH=. python build_util/make_docs.py
 
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@v3
diff --git a/README.md b/README.md
index b667f22e5..20267570c 100644
--- a/README.md
+++ b/README.md
@@ -612,7 +612,7 @@ VOICEVOX エディターにうまく読み込ませられないときは、エ
 次のコマンドで API ドキュメントを手動で作成することができます。
 
 ```bash
-python make_docs.py
+PYTHONPATH=. python build_util/make_docs.py
 ```
 
 ### GitHub Actions
diff --git a/make_docs.py b/build_util/make_docs.py
similarity index 100%
rename from make_docs.py
rename to build_util/make_docs.py

From 0887b07ef20b59aa0ac6b12ed8dc4c17b800f90c Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Thu, 14 Dec 2023 03:53:51 +0900
Subject: [PATCH 019/177] =?UTF-8?q?=E4=BF=AE=E6=AD=A3:=20tag=20=E4=BB=98?=
 =?UTF-8?q?=E3=81=91=E7=9B=B4=E3=81=97=E3=81=AB=E3=82=88=E3=82=8B=20releas?=
 =?UTF-8?q?e=20latest-dev=20=E3=81=AE=E6=9B=B4=E6=96=B0=20(#856)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 .github/workflows/build.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9f88c238e..218efd2c4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -634,6 +634,17 @@ jobs:
             ${{ steps.vars.outputs.package_name }}.vvpp.txt
           commit: ${{ github.sha }}
 
+  update-tag-to-current-commit:
+    if: needs.config.outputs.version != ''
+    needs: [config, build-and-upload]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Change tag to this commit for refreshing the release # c.f. voicevox_engine#854
+        run: |
+          git tag -f ${{ needs.config.outputs.version }}
+          git push -f --tag
+
   run-release-test-workflow:
     if: needs.config.outputs.version != ''
     needs: [config, build-and-upload]

From dbeda9da871abec5830a3e9994c6c0a53938105e Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Thu, 14 Dec 2023 04:01:52 +0900
Subject: [PATCH 020/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20Query=E5=85=A8?=
 =?UTF-8?q?=E4=BD=93=E3=83=91=E3=83=A9=E3=83=A1=E3=83=BC=E3=82=BF=E9=81=A9?=
 =?UTF-8?q?=E7=94=A8=E3=81=AE=E7=A7=BB=E6=A4=8D=20(#840)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_synthesis_engine.py                 | 20 ++++++++---------
 .../synthesis_engine/synthesis_engine.py      | 22 +++++++------------
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index bdb6de486..b64298417 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -332,7 +332,6 @@ def test_apply_output_stereo():
 def test_calc_frame_per_phoneme():
     """Test `calc_frame_per_phoneme`."""
     # Inputs
-    query = _gen_query(speedScale=2.0)
     moras = [
         _gen_mora("　", None, None, "　", 2 * 0.01067, 0.0),  # 0.01067 [sec/frame]
         _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 0.0),
@@ -345,11 +344,11 @@ def test_calc_frame_per_phoneme():
 
     # Expects
     #                        Pre k  o  N pau h  i  h  O Pst
-    true_frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
+    true_frame_per_phoneme = [2, 2, 4, 4, 2, 2, 4, 4, 2, 6]
     true_frame_per_phoneme = numpy.array(true_frame_per_phoneme, dtype=numpy.int32)
 
     # Outputs
-    frame_per_phoneme = calc_frame_per_phoneme(query, moras)
+    frame_per_phoneme = calc_frame_per_phoneme(moras)
 
     assert numpy.array_equal(frame_per_phoneme, true_frame_per_phoneme)
 
@@ -381,7 +380,6 @@ def test_calc_frame_per_mora():
 def test_calc_frame_pitch():
     """Test `test_calc_frame_pitch`."""
     # Inputs
-    query = _gen_query(pitchScale=2.0, intonationScale=0.5)
     moras = [
         _gen_mora("　", None, None, "　", 1 * 0.01067, 0.0),
         _gen_mora("コ", "k", 1 * 0.01067, "o", 2 * 0.01067, 50.0),
@@ -392,17 +390,16 @@ def test_calc_frame_pitch():
         _gen_mora("　", None, None, "　", 3 * 0.01067, 0.0),
     ]
 
-    # Expects - x4 value scaled -> mean=300 var x0.5 intonation scaling
     #           pau   ko     ko     ko      N      N
-    true1_f0 = [0.0, 250.0, 250.0, 250.0, 250.0, 250.0]
+    true1_f0 = [0.0, 50.0, 50.0, 50.0, 50.0, 50.0]
     #           pau   hi     hi     hi
-    true2_f0 = [0.0, 400.0, 400.0, 400.0]
+    true2_f0 = [0.0, 125.0, 125.0, 125.0]
     #           hO   hO   hO   paw  paw  paw
     true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
     true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
 
     # Outputs
-    f0 = calc_frame_pitch(query, moras)
+    f0 = calc_frame_pitch(moras)
 
     assert numpy.array_equal(f0, true_f0)
 
@@ -480,8 +477,11 @@ def test_feat_to_framescale():
 
     # Outputs
     flatten_moras = apply_prepost_silence(flatten_moras, query)
-    frame_per_phoneme = calc_frame_per_phoneme(query, flatten_moras)
-    f0 = calc_frame_pitch(query, flatten_moras)
+    flatten_moras = apply_speed_scale(flatten_moras, query)
+    flatten_moras = apply_pitch_scale(flatten_moras, query)
+    flatten_moras = apply_intonation_scale(flatten_moras, query)
+    frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
+    f0 = calc_frame_pitch(flatten_moras)
     frame_phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
 
     assert numpy.array_equal(frame_phoneme, true_frame_phoneme)
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index da74a1d78..19a29432f 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -155,13 +155,11 @@ def apply_speed_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
+def calc_frame_per_phoneme(moras: List[Mora]):
     """
     音素あたりのフレーム長を算出
     Parameters
     ----------
-    query : AudioQuery
-        音声合成クエリ
     moras : List[Mora]
         モーラ列
     Returns
@@ -169,9 +167,6 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
     frame_per_phoneme : NDArray[]
         音素あたりのフレーム長。端数丸め。
     """
-    # Apply: グローバル特徴量による補正（話速）
-    moras = apply_speed_scale(moras, query)
-
     frame_per_phoneme: list[ndarray] = []
     for mora in moras:
         if mora.consonant:
@@ -247,13 +242,11 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def calc_frame_pitch(query: AudioQuery, moras: list[Mora]) -> ndarray:
+def calc_frame_pitch(moras: list[Mora]) -> ndarray:
     """
     フレームごとのピッチの生成
     Parameters
     ----------
-    query : AudioQuery
-        音声合成クエリ
     moras : List[Mora]
         モーラ列
     Returns
@@ -261,9 +254,6 @@ def calc_frame_pitch(query: AudioQuery, moras: list[Mora]) -> ndarray:
     frame_f0 : NDArray[]
         フレームごとの基本周波数系列
     """
-    moras = apply_pitch_scale(moras, query)
-    moras = apply_intonation_scale(moras, query)
-
     # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
     # モーラごとの基本周波数
     f0 = numpy.array([mora.pitch for mora in moras], dtype=numpy.float32)
@@ -620,8 +610,12 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
         flatten_moras, phoneme_data_list = pre_process(query.accent_phrases)
 
         flatten_moras = apply_prepost_silence(flatten_moras, query)
-        frame_per_phoneme = calc_frame_per_phoneme(query, flatten_moras)
-        f0 = calc_frame_pitch(query, flatten_moras)
+        flatten_moras = apply_speed_scale(flatten_moras, query)
+        flatten_moras = apply_pitch_scale(flatten_moras, query)
+        flatten_moras = apply_intonation_scale(flatten_moras, query)
+
+        frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
+        f0 = calc_frame_pitch(flatten_moras)
         phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
 
         # 今まで生成された情報をdecode_forwardにかけ、推論器によって音声波形を生成する

From 6f47a79c7c619cac177c6c78a80ccea7a05c33cd Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Thu, 14 Dec 2023 04:08:23 +0900
Subject: [PATCH 021/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`speaker=5Finfo`?=
 =?UTF-8?q?=20API=E5=86=85=E9=83=A8=E5=AE=9F=E8=A3=85=E5=8D=98=E7=B4=94?=
 =?UTF-8?q?=E5=8C=96=20(#849)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 run.py | 69 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 23 deletions(-)

diff --git a/run.py b/run.py
index 85ae2002a..fad40b3b5 100644
--- a/run.py
+++ b/run.py
@@ -785,6 +785,31 @@ def speaker_info(
         -------
         ret_data: SpeakerInfo
         """
+
+        # エンジンに含まれる話者メタ情報は、次のディレクトリ構造に従わなければならない：
+        # {root_dir}/
+        #   speaker_info/
+        #       {speaker_uuid_0}/
+        #           policy.md
+        #           portrait.png
+        #           icons/
+        #               {id_0}.png
+        #               {id_1}.png
+        #               ...
+        #           portraits/
+        #               {id_0}.png
+        #               {id_1}.png
+        #               ...
+        #           voice_samples/
+        #               {id_0}_001.wav
+        #               {id_0}_002.wav
+        #               {id_0}_003.wav
+        #               {id_1}_001.wav
+        #               ...
+        #       {speaker_uuid_1}/
+        #           ...
+
+        # 該当話者の検索
         speakers = json.loads(get_engine(core_version).speakers)
         for i in range(len(speakers)):
             if speakers[i]["speaker_uuid"] == speaker_uuid:
@@ -794,35 +819,32 @@ def speaker_info(
             raise HTTPException(status_code=404, detail="該当する話者が見つかりません")
 
         try:
-            policy = (root_dir / f"speaker_info/{speaker_uuid}/policy.md").read_text(
-                "utf-8"
-            )
-            portrait = b64encode_str(
-                (root_dir / f"speaker_info/{speaker_uuid}/portrait.png").read_bytes()
-            )
+            speaker_path = root_dir / "speaker_info" / speaker_uuid
+            # 話者情報の取得
+            # speaker policy
+            policy_path = speaker_path / "policy.md"
+            policy = policy_path.read_text("utf-8")
+            # speaker portrait
+            portrait_path = speaker_path / "portrait.png"
+            portrait = b64encode_str(portrait_path.read_bytes())
+            # スタイル情報の取得
             style_infos = []
             for style in speaker["styles"]:
                 id = style["id"]
-                icon = b64encode_str(
-                    (
-                        root_dir / f"speaker_info/{speaker_uuid}/icons/{id}.png"
-                    ).read_bytes()
-                )
-                style_portrait_path = (
-                    root_dir / f"speaker_info/{speaker_uuid}/portraits/{id}.png"
-                )
-                style_portrait = (
-                    b64encode_str(style_portrait_path.read_bytes())
-                    if style_portrait_path.exists()
-                    else None
-                )
+                # style icon
+                style_icon_path = speaker_path / "icons" / f"{id}.png"
+                icon = b64encode_str(style_icon_path.read_bytes())
+                # style portrait
+                style_portrait_path = speaker_path / "portraits" / f"{id}.png"
+                style_portrait = None
+                if style_portrait_path.exists():
+                    style_portrait = b64encode_str(style_portrait_path.read_bytes())
+                # voice samples
                 voice_samples = [
                     b64encode_str(
                         (
-                            root_dir
-                            / "speaker_info/{}/voice_samples/{}_{}.wav".format(
-                                speaker_uuid, id, str(j + 1).zfill(3)
-                            )
+                            speaker_path
+                            / "voice_samples/{}_{}.wav".format(id, str(j + 1).zfill(3))
                         ).read_bytes()
                     )
                     for j in range(3)
@@ -842,6 +864,7 @@ def speaker_info(
             raise HTTPException(status_code=500, detail="追加情報が見つかりませんでした")
 
         ret_data = {"policy": policy, "portrait": portrait, "style_infos": style_infos}
+
         return ret_data
 
     @app.get(

From 0769a4d58da7adf00605241ada738419f4350af6 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Thu, 14 Dec 2023 04:53:57 +0900
Subject: [PATCH 022/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`pre=5Fprocess`?=
 =?UTF-8?q?=20=E3=81=AE=E7=B4=B0=E5=88=86=E5=8C=96=20(#851)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: モーラ・音素抽出の関数化と簡略化

* Refactor: クエリ処理テストの範囲拡大

* Refactor: 合成時の `pre_process` 置き換え
---
 test/test_synthesis_engine.py                 | 53 ++++++++++---
 .../synthesis_engine/synthesis_engine.py      | 78 ++++++++++---------
 2 files changed, 85 insertions(+), 46 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index b64298417..66192d32e 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -28,6 +28,7 @@
     pre_process,
     split_mora,
     to_flatten_moras,
+    to_flatten_phonemes,
     unvoiced_mora_phoneme_list,
 )
 
@@ -180,6 +181,24 @@ def _gen_mora(
     )
 
 
+def test_to_flatten_phonemes():
+    """Test `to_flatten_phonemes`."""
+    # Inputs
+    moras = [
+        _gen_mora("　", None, None, "sil", 2 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 100.0),
+        _gen_mora("　", None, None, "sil", 6 * 0.01067, 0.0),
+    ]
+
+    # Expects
+    true_phonemes = ["pau", "h", "i", "pau"]
+
+    # Outputs
+    phonemes = list(map(lambda p: p.phoneme, to_flatten_phonemes(moras)))
+
+    assert true_phonemes == phonemes
+
+
 def test_apply_prepost_silence():
     """Test `apply_prepost_silence`."""
     # Inputs
@@ -430,22 +449,32 @@ def test_calc_frame_phoneme():
 def test_feat_to_framescale():
     """Test Mora/Phonemefeature-to-framescaleFeature pipeline."""
     # Inputs
+    accent_phrases = [
+        AccentPhrase(
+            moras=[
+                _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
+                _gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
+            ],
+            accent=1,
+            pause_mora=_gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
+        ),
+        AccentPhrase(
+            moras=[
+                _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
+                _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
+            ],
+            accent=1,
+            pause_mora=None,
+        ),
+    ]
     query = _gen_query(
+        accent_phrases=accent_phrases,
         speedScale=2.0,
         pitchScale=2.0,
         intonationScale=0.5,
         prePhonemeLength=2 * 0.01067,
         postPhonemeLength=6 * 0.01067,
     )
-    flatten_moras = [
-        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
-        _gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
-        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
-        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
-        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
-    ]
-    phoneme_str = "pau k o N pau h i h O pau"
-    phoneme_data_list = [OjtPhoneme(p) for p in phoneme_str.split()]
 
     # Expects
     # frame_per_phoneme
@@ -473,13 +502,15 @@ def test_feat_to_framescale():
     true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
     true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
 
-    assert true_frame_per_phoneme.shape[0] == len(phoneme_data_list), "Prerequisites"
-
     # Outputs
+    flatten_moras = to_flatten_moras(query.accent_phrases)
     flatten_moras = apply_prepost_silence(flatten_moras, query)
     flatten_moras = apply_speed_scale(flatten_moras, query)
     flatten_moras = apply_pitch_scale(flatten_moras, query)
     flatten_moras = apply_intonation_scale(flatten_moras, query)
+
+    phoneme_data_list = to_flatten_phonemes(flatten_moras)
+
     frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
     f0 = calc_frame_pitch(flatten_moras)
     frame_phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index 19a29432f..181273fe3 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -1,7 +1,6 @@
 import math
 import threading
-from itertools import chain
-from typing import List, Optional, Tuple
+from typing import List, Optional
 
 import numpy
 from numpy import ndarray
@@ -17,30 +16,44 @@
 
 
 # TODO: move mora utility to mora module
-def to_flatten_moras(accent_phrases: List[AccentPhrase]) -> List[Mora]:
+def to_flatten_moras(accent_phrases: list[AccentPhrase]) -> list[Mora]:
     """
-    accent_phrasesに含まれるMora(とpause_moraがあればそれも)を
-    すべて一つのリストに結合する
+    アクセント句系列に含まれるモーラの抽出
     Parameters
     ----------
-    accent_phrases : List[AccentPhrase]
-        AccentPhraseのリスト
+    accent_phrases : list[AccentPhrase]
+        アクセント句系列
     Returns
     -------
-    moras : List[Mora]
-        結合されたMoraのリストを返す
+    moras : list[Mora]
+        モーラ系列。ポーズモーラを含む。
     """
-    return list(
-        chain.from_iterable(
-            accent_phrase.moras
-            + (
-                [accent_phrase.pause_mora]
-                if accent_phrase.pause_mora is not None
-                else []
-            )
-            for accent_phrase in accent_phrases
-        )
-    )
+    moras: list[Mora] = []
+    for accent_phrase in accent_phrases:
+        moras += accent_phrase.moras
+        if accent_phrase.pause_mora:
+            moras += [accent_phrase.pause_mora]
+    return moras
+
+
+def to_flatten_phonemes(moras: list[Mora]) -> list[OjtPhoneme]:
+    """
+    モーラ系列に含まれる音素の抽出
+    Parameters
+    ----------
+    moras : list[Mora]
+        モーラ系列
+    Returns
+    -------
+    phonemes : list[OjtPhoneme]
+        音素系列
+    """
+    phonemes: list[OjtPhoneme] = []
+    for mora in moras:
+        if mora.consonant:
+            phonemes += [OjtPhoneme(mora.consonant)]
+        phonemes += [(OjtPhoneme(mora.vowel))]
+    return phonemes
 
 
 def split_mora(phoneme_list: List[OjtPhoneme]):
@@ -80,8 +93,8 @@ def split_mora(phoneme_list: List[OjtPhoneme]):
 
 
 def pre_process(
-    accent_phrases: List[AccentPhrase],
-) -> Tuple[List[Mora], List[OjtPhoneme]]:
+    accent_phrases: list[AccentPhrase],
+) -> tuple[list[Mora], list[OjtPhoneme]]:
     """
     AccentPhraseモデルのリストを整形し、処理に必要なデータの原型を作り出す
     Parameters
@@ -92,21 +105,16 @@ def pre_process(
     -------
     flatten_moras : List[Mora]
         モーラ列（前後の無音含まない）
-    phoneme_data_list : List[OjtPhoneme]
+    phonemes : List[OjtPhoneme]
         音素列（前後の無音含む）
     """
     flatten_moras = to_flatten_moras(accent_phrases)
+    phonemes = to_flatten_phonemes(flatten_moras)
 
-    phoneme_each_mora = [
-        ([mora.consonant] if mora.consonant is not None else []) + [mora.vowel]
-        for mora in flatten_moras
-    ]
-    phoneme_str_list = list(chain.from_iterable(phoneme_each_mora))
-    phoneme_str_list = ["pau"] + phoneme_str_list + ["pau"]
-
-    phoneme_data_list = list(map(OjtPhoneme, phoneme_str_list))
+    # 前後無音の追加
+    phonemes = [OjtPhoneme("pau")] + phonemes + [OjtPhoneme("pau")]
 
-    return flatten_moras, phoneme_data_list
+    return flatten_moras, phonemes
 
 
 def generate_silence_mora(length: float) -> Mora:
@@ -605,15 +613,15 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
         """
         # モデルがロードされていない場合はロードする
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
-        # phoneme
-        # AccentPhraseをすべてMoraおよびOjtPhonemeの形に分解し、処理可能な形にする
-        flatten_moras, phoneme_data_list = pre_process(query.accent_phrases)
 
+        flatten_moras = to_flatten_moras(query.accent_phrases)
         flatten_moras = apply_prepost_silence(flatten_moras, query)
         flatten_moras = apply_speed_scale(flatten_moras, query)
         flatten_moras = apply_pitch_scale(flatten_moras, query)
         flatten_moras = apply_intonation_scale(flatten_moras, query)
 
+        phoneme_data_list = to_flatten_phonemes(flatten_moras)
+
         frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
         f0 = calc_frame_pitch(flatten_moras)
         phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)

From 5580eeecd8a1c2da2091fff0781baab260cf4429 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 15 Dec 2023 23:59:46 +0900
Subject: [PATCH 023/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20TTS=E7=B3=BB?=
 =?UTF-8?q?=E3=83=87=E3=82=A3=E3=83=AC=E3=82=AF=E3=83=88=E3=83=AA=E6=A7=8B?=
 =?UTF-8?q?=E9=80=A0=E3=81=AE=E6=94=B9=E5=96=84=20(#867)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 run.py                                               |  4 ++--
 test/e2e/conftest.py                                 |  2 +-
 test/test_acoustic_feature_extractor.py              |  2 +-
 test/test_full_context_label.py                      |  2 +-
 test/test_kana_parser.py                             |  4 ++--
 test/test_mock_synthesis_engine.py                   |  2 +-
 test/test_mora_list.py                               |  2 +-
 test/test_mora_to_text.py                            |  2 +-
 test/test_synthesis_engine.py                        |  6 +++---
 test/test_synthesis_engine_base.py                   |  2 +-
 test/test_user_dict_model.py                         |  2 +-
 voicevox_engine/cancellable_engine.py                |  2 +-
 .../{synthesis_engine => }/core_wrapper.py           |  0
 voicevox_engine/dev/synthesis_engine/mock.py         |  4 ++--
 voicevox_engine/metas/MetasStore.py                  |  4 +---
 voicevox_engine/morphing.py                          |  2 +-
 voicevox_engine/synthesis_engine/__init__.py         | 12 ------------
 voicevox_engine/tts_pipeline/__init__.py             | 12 ++++++++++++
 .../{ => tts_pipeline}/acoustic_feature_extractor.py |  0
 .../{ => tts_pipeline}/full_context_label.py         |  0
 voicevox_engine/{ => tts_pipeline}/kana_parser.py    |  2 +-
 .../make_tts_engines.py}                             |  4 ++--
 voicevox_engine/{ => tts_pipeline}/mora_list.py      |  0
 .../tts_engine.py}                                   |  6 +++---
 .../tts_engine_base.py}                              |  6 +++---
 25 files changed, 41 insertions(+), 43 deletions(-)
 rename voicevox_engine/{synthesis_engine => }/core_wrapper.py (100%)
 delete mode 100644 voicevox_engine/synthesis_engine/__init__.py
 create mode 100644 voicevox_engine/tts_pipeline/__init__.py
 rename voicevox_engine/{ => tts_pipeline}/acoustic_feature_extractor.py (100%)
 rename voicevox_engine/{ => tts_pipeline}/full_context_label.py (100%)
 rename voicevox_engine/{ => tts_pipeline}/kana_parser.py (99%)
 rename voicevox_engine/{synthesis_engine/make_synthesis_engines.py => tts_pipeline/make_tts_engines.py} (97%)
 rename voicevox_engine/{ => tts_pipeline}/mora_list.py (100%)
 rename voicevox_engine/{synthesis_engine/synthesis_engine.py => tts_pipeline/tts_engine.py} (99%)
 rename voicevox_engine/{synthesis_engine/synthesis_engine_base.py => tts_pipeline/tts_engine_base.py} (98%)

diff --git a/run.py b/run.py
index fad40b3b5..ec38fd41d 100644
--- a/run.py
+++ b/run.py
@@ -30,7 +30,6 @@
 from voicevox_engine.cancellable_engine import CancellableEngine
 from voicevox_engine.engine_manifest import EngineManifestLoader
 from voicevox_engine.engine_manifest.EngineManifest import EngineManifest
-from voicevox_engine.kana_parser import create_kana, parse_kana
 from voicevox_engine.library_manager import LibraryManager
 from voicevox_engine.metas.MetasStore import MetasStore, construct_lookup
 from voicevox_engine.model import (
@@ -66,7 +65,8 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.synthesis_engine import SynthesisEngineBase, make_synthesis_engines
+from voicevox_engine.tts_pipeline import SynthesisEngineBase, make_synthesis_engines
+from voicevox_engine.tts_pipeline.kana_parser import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
     delete_word,
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index b6eab18ae..9475d3b05 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -6,7 +6,7 @@
 
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import SettingLoader
-from voicevox_engine.synthesis_engine import make_synthesis_engines
+from voicevox_engine.tts_pipeline import make_synthesis_engines
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
 
diff --git a/test/test_acoustic_feature_extractor.py b/test/test_acoustic_feature_extractor.py
index 24c70d284..9e2a4867c 100644
--- a/test/test_acoustic_feature_extractor.py
+++ b/test/test_acoustic_feature_extractor.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
 
-from voicevox_engine.acoustic_feature_extractor import OjtPhoneme
+from voicevox_engine.tts_pipeline.acoustic_feature_extractor import OjtPhoneme
 
 TRUE_NUM_PHONEME = 45
 
diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 7cdde34f4..0c9ce3ee0 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -2,7 +2,7 @@
 from itertools import chain
 from unittest import TestCase
 
-from voicevox_engine.full_context_label import (
+from voicevox_engine.tts_pipeline.full_context_label import (
     AccentPhrase,
     BreathGroup,
     Mora,
diff --git a/test/test_kana_parser.py b/test/test_kana_parser.py
index ef800b600..3e4c19a97 100644
--- a/test/test_kana_parser.py
+++ b/test/test_kana_parser.py
@@ -1,9 +1,9 @@
 from typing import List
 from unittest import TestCase
 
-from voicevox_engine import kana_parser
-from voicevox_engine.kana_parser import create_kana
 from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
+from voicevox_engine.tts_pipeline import kana_parser
+from voicevox_engine.tts_pipeline.kana_parser import create_kana
 
 
 def parse_kana(text: str) -> List[AccentPhrase]:
diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py
index ce6c59825..27fee31c1 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_synthesis_engine.py
@@ -1,8 +1,8 @@
 from unittest import TestCase
 
 from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine
-from voicevox_engine.kana_parser import create_kana
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
+from voicevox_engine.tts_pipeline.kana_parser import create_kana
 
 
 class TestMockSynthesisEngine(TestCase):
diff --git a/test/test_mora_list.py b/test/test_mora_list.py
index 25b287fa0..a2928205a 100644
--- a/test/test_mora_list.py
+++ b/test/test_mora_list.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
 
-from voicevox_engine.mora_list import openjtalk_mora2text
+from voicevox_engine.tts_pipeline.mora_list import openjtalk_mora2text
 
 
 class TestOpenJTalkMoraList(TestCase):
diff --git a/test/test_mora_to_text.py b/test/test_mora_to_text.py
index 691681dd1..f8f531008 100644
--- a/test/test_mora_to_text.py
+++ b/test/test_mora_to_text.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
-from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text
+from voicevox_engine.tts_pipeline.tts_engine_base import mora_to_text
 
 
 class TestMoraToText(TestCase):
diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 66192d32e..00730bb5d 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -7,12 +7,12 @@
 
 import numpy
 
-from voicevox_engine.acoustic_feature_extractor import OjtPhoneme
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.synthesis_engine import SynthesisEngine
+from voicevox_engine.tts_pipeline import SynthesisEngine
+from voicevox_engine.tts_pipeline.acoustic_feature_extractor import OjtPhoneme
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
-from voicevox_engine.synthesis_engine.synthesis_engine import (
+from voicevox_engine.tts_pipeline.tts_engine import (
     apply_intonation_scale,
     apply_output_sampling_rate,
     apply_output_stereo,
diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py
index c49dcbe01..7fa8fd676 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_synthesis_engine_base.py
@@ -5,7 +5,7 @@
 import numpy
 
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.synthesis_engine import SynthesisEngine
+from voicevox_engine.tts_pipeline import SynthesisEngine
 
 
 def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray):
diff --git a/test/test_user_dict_model.py b/test/test_user_dict_model.py
index 9a3a49021..646340c6c 100644
--- a/test/test_user_dict_model.py
+++ b/test/test_user_dict_model.py
@@ -3,8 +3,8 @@
 
 from pydantic import ValidationError
 
-from voicevox_engine.kana_parser import parse_kana
 from voicevox_engine.model import UserDictWord
+from voicevox_engine.tts_pipeline.kana_parser import parse_kana
 
 
 class TestUserDictWords(TestCase):
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index c473c3e4a..140a7f138 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -17,7 +17,7 @@
 from fastapi import HTTPException, Request
 
 from .model import AudioQuery
-from .synthesis_engine import make_synthesis_engines
+from .tts_pipeline import make_synthesis_engines
 from .utility import get_latest_core_version
 
 
diff --git a/voicevox_engine/synthesis_engine/core_wrapper.py b/voicevox_engine/core_wrapper.py
similarity index 100%
rename from voicevox_engine/synthesis_engine/core_wrapper.py
rename to voicevox_engine/core_wrapper.py
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index 1b6c4abeb..ec366b31b 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -6,8 +6,8 @@
 from soxr import resample
 
 from ...model import AccentPhrase, AudioQuery
-from ...synthesis_engine import SynthesisEngineBase
-from ...synthesis_engine.synthesis_engine import to_flatten_moras
+from ...tts_pipeline import SynthesisEngineBase
+from ...tts_pipeline.tts_engine import to_flatten_moras
 
 
 class MockSynthesisEngine(SynthesisEngineBase):
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index c8367e831..497b2723e 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -5,9 +5,7 @@
 from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo
 
 if TYPE_CHECKING:
-    from voicevox_engine.synthesis_engine.synthesis_engine_base import (
-        SynthesisEngineBase,
-    )
+    from voicevox_engine.tts_pipeline.tts_engine_base import SynthesisEngineBase
 
 
 class MetasStore:
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 74c82fb7d..89a2498c3 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -10,7 +10,7 @@
 from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo
 from .metas.MetasStore import construct_lookup
 from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
-from .synthesis_engine import SynthesisEngine
+from .tts_pipeline import SynthesisEngine
 
 
 # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398  # noqa
diff --git a/voicevox_engine/synthesis_engine/__init__.py b/voicevox_engine/synthesis_engine/__init__.py
deleted file mode 100644
index 3e7f6a1ef..000000000
--- a/voicevox_engine/synthesis_engine/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from .core_wrapper import CoreWrapper, load_runtime_lib
-from .make_synthesis_engines import make_synthesis_engines
-from .synthesis_engine import SynthesisEngine
-from .synthesis_engine_base import SynthesisEngineBase
-
-__all__ = [
-    "CoreWrapper",
-    "load_runtime_lib",
-    "make_synthesis_engines",
-    "SynthesisEngine",
-    "SynthesisEngineBase",
-]
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
new file mode 100644
index 000000000..2fce842ba
--- /dev/null
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -0,0 +1,12 @@
+from ..core_wrapper import CoreWrapper, load_runtime_lib
+from .make_tts_engines import make_synthesis_engines
+from .tts_engine import SynthesisEngine
+from .tts_engine_base import SynthesisEngineBase
+
+__all__ = [
+    "CoreWrapper",
+    "load_runtime_lib",
+    "make_synthesis_engines",
+    "SynthesisEngine",
+    "SynthesisEngineBase",
+]
diff --git a/voicevox_engine/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
similarity index 100%
rename from voicevox_engine/acoustic_feature_extractor.py
rename to voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
diff --git a/voicevox_engine/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
similarity index 100%
rename from voicevox_engine/full_context_label.py
rename to voicevox_engine/tts_pipeline/full_context_label.py
diff --git a/voicevox_engine/kana_parser.py b/voicevox_engine/tts_pipeline/kana_parser.py
similarity index 99%
rename from voicevox_engine/kana_parser.py
rename to voicevox_engine/tts_pipeline/kana_parser.py
index 14efb4672..430960156 100644
--- a/voicevox_engine/kana_parser.py
+++ b/voicevox_engine/tts_pipeline/kana_parser.py
@@ -5,7 +5,7 @@
 
 from typing import List, Optional
 
-from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
+from ..model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
 from .mora_list import openjtalk_text2mora
 
 _LOOP_LIMIT = 300
diff --git a/voicevox_engine/synthesis_engine/make_synthesis_engines.py b/voicevox_engine/tts_pipeline/make_tts_engines.py
similarity index 97%
rename from voicevox_engine/synthesis_engine/make_synthesis_engines.py
rename to voicevox_engine/tts_pipeline/make_tts_engines.py
index 848a601af..09183574a 100644
--- a/voicevox_engine/synthesis_engine/make_synthesis_engines.py
+++ b/voicevox_engine/tts_pipeline/make_tts_engines.py
@@ -3,9 +3,9 @@
 from pathlib import Path
 from typing import Dict, List, Optional
 
+from ..core_wrapper import CoreWrapper, load_runtime_lib
 from ..utility import engine_root, get_save_dir
-from .core_wrapper import CoreWrapper, load_runtime_lib
-from .synthesis_engine import SynthesisEngine, SynthesisEngineBase
+from .tts_engine import SynthesisEngine, SynthesisEngineBase
 
 
 def make_synthesis_engines(
diff --git a/voicevox_engine/mora_list.py b/voicevox_engine/tts_pipeline/mora_list.py
similarity index 100%
rename from voicevox_engine/mora_list.py
rename to voicevox_engine/tts_pipeline/mora_list.py
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
similarity index 99%
rename from voicevox_engine/synthesis_engine/synthesis_engine.py
rename to voicevox_engine/tts_pipeline/tts_engine.py
index 181273fe3..372900c6f 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -6,10 +6,10 @@
 from numpy import ndarray
 from soxr import resample
 
-from ..acoustic_feature_extractor import OjtPhoneme
+from ..core_wrapper import CoreWrapper, OldCoreError
 from ..model import AccentPhrase, AudioQuery, Mora
-from .core_wrapper import CoreWrapper, OldCoreError
-from .synthesis_engine_base import SynthesisEngineBase
+from .acoustic_feature_extractor import OjtPhoneme
+from .tts_engine_base import SynthesisEngineBase
 
 unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
 mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
similarity index 98%
rename from voicevox_engine/synthesis_engine/synthesis_engine_base.py
rename to voicevox_engine/tts_pipeline/tts_engine_base.py
index 6a139a830..502580f8e 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -4,10 +4,10 @@
 
 import numpy as np
 
-from .. import full_context_label
-from ..full_context_label import extract_full_context_label
 from ..model import AccentPhrase, AudioQuery, Mora
-from ..mora_list import openjtalk_mora2text
+from . import full_context_label
+from .full_context_label import extract_full_context_label
+from .mora_list import openjtalk_mora2text
 
 
 def mora_to_text(mora: str) -> str:

From a20c82b8bb061de49134d37e25ebbb3567bd9acb Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 16 Dec 2023 00:49:05 +0900
Subject: [PATCH 024/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=93?=
 =?UTF-8?q?=E3=83=AB=E3=83=89=E3=83=84=E3=83=BC=E3=83=AB=E3=81=AE=E7=A7=BB?=
 =?UTF-8?q?=E5=8B=95=20(#874)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile                                                  | 4 ++--
 build_util/create_venv_and_generate_licenses.bash           | 2 +-
 generate_licenses.py => build_util/generate_licenses.py     | 0
 get_cost_candidates.py => build_util/get_cost_candidates.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
 rename generate_licenses.py => build_util/generate_licenses.py (100%)
 rename get_cost_candidates.py => build_util/get_cost_candidates.py (97%)

diff --git a/Dockerfile b/Dockerfile
index 545449a7c..dd056d78f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -227,7 +227,7 @@ COPY --from=download-onnxruntime-env /opt/onnxruntime /opt/onnxruntime
 # Add local files
 ADD ./voicevox_engine /opt/voicevox_engine/voicevox_engine
 ADD ./docs /opt/voicevox_engine/docs
-ADD ./run.py ./generate_licenses.py ./presets.yaml ./default.csv ./engine_manifest.json /opt/voicevox_engine/
+ADD ./run.py ./build_util/generate_licenses.py ./presets.yaml ./default.csv ./engine_manifest.json /opt/voicevox_engine/
 ADD ./speaker_info /opt/voicevox_engine/speaker_info
 ADD ./ui_template /opt/voicevox_engine/ui_template
 ADD ./engine_manifest_assets /opt/voicevox_engine/engine_manifest_assets
@@ -249,7 +249,7 @@ RUN <<EOF
     export PATH="/home/user/.local/bin:${PATH:-}"
 
     gosu user /opt/python/bin/pip3 install -r /tmp/requirements-license.txt
-    gosu user /opt/python/bin/python3 generate_licenses.py > /opt/voicevox_engine/engine_manifest_assets/dependency_licenses.json
+    gosu user /opt/python/bin/python3 build_util/generate_licenses.py > /opt/voicevox_engine/engine_manifest_assets/dependency_licenses.json
     cp /opt/voicevox_engine/engine_manifest_assets/dependency_licenses.json /opt/voicevox_engine/licenses.json
 EOF
 
diff --git a/build_util/create_venv_and_generate_licenses.bash b/build_util/create_venv_and_generate_licenses.bash
index d2c837dbf..71a5f61c9 100644
--- a/build_util/create_venv_and_generate_licenses.bash
+++ b/build_util/create_venv_and_generate_licenses.bash
@@ -17,7 +17,7 @@ else
 fi
 
 pip install -r requirements-license.txt
-python generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH
+python build_util/generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH
 
 deactivate
 
diff --git a/generate_licenses.py b/build_util/generate_licenses.py
similarity index 100%
rename from generate_licenses.py
rename to build_util/generate_licenses.py
diff --git a/get_cost_candidates.py b/build_util/get_cost_candidates.py
similarity index 97%
rename from get_cost_candidates.py
rename to build_util/get_cost_candidates.py
index 072c4b4d5..785a0c4df 100644
--- a/get_cost_candidates.py
+++ b/build_util/get_cost_candidates.py
@@ -3,7 +3,7 @@
 引数のnaist_jdic_pathには、open_jtalkのsrc/mecab-naist-jdic/naist-jdic.csvを指定してください。
 
 実行例:
-python get_cost_candidates.py --naist_jdic_path=/path/to/naist-jdic.csv \
+python build_util/get_cost_candidates.py --naist_jdic_path=/path/to/naist-jdic.csv \
     --pos=名詞 \
     --pos_detail_1=固有名詞 \
     --pos_detail_2=一般 \

From ec1f70e52b1df5628317311fced841f4e9d8877c Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 16 Dec 2023 01:05:09 +0900
Subject: [PATCH 025/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20`MetasStore`=20?=
 =?UTF-8?q?=E3=81=AE=E4=B8=8D=E4=BD=BF=E7=94=A8=E9=96=A2=E6=95=B0=20(#875)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 voicevox_engine/metas/MetasStore.py | 52 ++++-------------------------
 1 file changed, 7 insertions(+), 45 deletions(-)

diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 497b2723e..78f838a2a 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -20,7 +20,6 @@ def __init__(self, engine_speakers_path: Path) -> None:
         engine_speakers_path : Path
             エンジンに含まれる話者メタ情報ディレクトリのパス。
         """
-        self._engine_speakers_path = engine_speakers_path
         # エンジンに含まれる各話者のメタ情報
         self._loaded_metas: Dict[str, EngineSpeaker] = {
             folder.name: EngineSpeaker(
@@ -29,41 +28,6 @@ def __init__(self, engine_speakers_path: Path) -> None:
             for folder in engine_speakers_path.iterdir()
         }
 
-    def speaker_engine_metas(self, speaker_uuid: str) -> EngineSpeaker:
-        """
-        エンジンに含まれる指定話者のメタ情報を取得
-        Parameters
-        ----------
-        speaker_uuid : str
-            話者UUID
-        Returns
-        -------
-        ret : EngineSpeaker
-            エンジンに含まれる指定話者のメタ情報
-        """
-        return self.loaded_metas[speaker_uuid]
-
-    def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]:
-        """
-        コアに含まれる話者メタ情報に、エンジンに含まれる話者メタ情報を統合して返す
-        Parameters
-        ----------
-        core_metas : List[CoreSpeaker]
-            コアに含まれる話者メタ情報
-        Returns
-        -------
-        ret : List[Speaker]
-            エンジンとコアに含まれる話者メタ情報
-        """
-        # 話者単位でエンジン・コアに含まれるメタ情報を統合
-        return [
-            Speaker(
-                **self.speaker_engine_metas(speaker_meta.speaker_uuid).dict(),
-                **speaker_meta.dict(),
-            )
-            for speaker_meta in core_metas
-        ]
-
     # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで
     # SynthesisEngineBaseによる循環importを修正する
     def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]:
@@ -81,15 +45,13 @@ def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]:
         # コアに含まれる話者メタ情報の収集
         core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)]
         # エンジンに含まれる話者メタ情報との統合
-        return self.combine_metas(core_metas)
-
-    @property
-    def engine_speakers_path(self) -> Path:
-        return self._engine_speakers_path
-
-    @property
-    def loaded_metas(self) -> Dict[str, EngineSpeaker]:
-        return self._loaded_metas
+        return [
+            Speaker(
+                **self.self._loaded_metas[speaker_meta.speaker_uuid].dict(),
+                **speaker_meta.dict(),
+            )
+            for speaker_meta in core_metas
+        ]
 
 
 def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]:

From f8750a1beac6c51ab7db6541d7978c92a7d02a0d Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sat, 16 Dec 2023 01:19:23 +0900
Subject: [PATCH 026/177] =?UTF-8?q?hotfix/build=5Futil=E3=83=87=E3=82=A3?=
 =?UTF-8?q?=E3=83=AC=E3=82=AF=E3=83=88=E3=83=AA=E5=86=85=E3=81=AE=E3=82=B9?=
 =?UTF-8?q?=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88=E3=81=AEDockerfile?=
 =?UTF-8?q?=E5=86=85=E3=81=A7=E3=81=AE=E3=82=B3=E3=83=94=E3=83=BC=E3=82=92?=
 =?UTF-8?q?=E6=AD=A3=E3=81=97=E3=81=84=E5=BD=A2=E3=81=AB=20(#878)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index dd056d78f..ef8ca0727 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -227,7 +227,8 @@ COPY --from=download-onnxruntime-env /opt/onnxruntime /opt/onnxruntime
 # Add local files
 ADD ./voicevox_engine /opt/voicevox_engine/voicevox_engine
 ADD ./docs /opt/voicevox_engine/docs
-ADD ./run.py ./build_util/generate_licenses.py ./presets.yaml ./default.csv ./engine_manifest.json /opt/voicevox_engine/
+ADD ./run.py ./presets.yaml ./default.csv ./engine_manifest.json /opt/voicevox_engine/
+ADD ./build_util/generate_licenses.py /opt/voicevox_engine/build_util
 ADD ./speaker_info /opt/voicevox_engine/speaker_info
 ADD ./ui_template /opt/voicevox_engine/ui_template
 ADD ./engine_manifest_assets /opt/voicevox_engine/engine_manifest_assets

From 6a2a010c400edae6e9a34f787edf8e0d9847ab4e Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sat, 16 Dec 2023 01:30:52 +0900
Subject: [PATCH 027/177] =?UTF-8?q?[hotfix]=20Dockerfile=E3=81=AE=E3=83=87?=
 =?UTF-8?q?=E3=82=A3=E3=83=AC=E3=82=AF=E3=83=88=E3=83=AA=E4=BB=A5=E4=B8=8B?=
 =?UTF-8?q?=E3=81=B8=E3=81=AE=E3=82=B3=E3=83=94=E3=83=BC=E3=81=AE=E3=82=B9?=
 =?UTF-8?q?=E3=83=A9=E3=83=83=E3=82=B7=E3=83=A5=E5=BF=98=E3=82=8C=20(#879)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index ef8ca0727..225b84bf9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -228,7 +228,7 @@ COPY --from=download-onnxruntime-env /opt/onnxruntime /opt/onnxruntime
 ADD ./voicevox_engine /opt/voicevox_engine/voicevox_engine
 ADD ./docs /opt/voicevox_engine/docs
 ADD ./run.py ./presets.yaml ./default.csv ./engine_manifest.json /opt/voicevox_engine/
-ADD ./build_util/generate_licenses.py /opt/voicevox_engine/build_util
+ADD ./build_util/generate_licenses.py /opt/voicevox_engine/build_util/
 ADD ./speaker_info /opt/voicevox_engine/speaker_info
 ADD ./ui_template /opt/voicevox_engine/ui_template
 ADD ./engine_manifest_assets /opt/voicevox_engine/engine_manifest_assets

From 6c80586851fdecc52ef9f65ae0594f833296fe1b Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 16 Dec 2023 20:50:52 +0900
Subject: [PATCH 028/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=95?=
 =?UTF-8?q?=E3=83=AB=E3=82=B3=E3=83=B3=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E3=83=A9=E3=83=99=E3=83=AB=E9=96=A2=E9=80=A3=E3=82=B3=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=E3=83=88/docstring/=E5=9E=8B=E3=83=92=E3=83=B3?=
 =?UTF-8?q?=E3=83=88=20(#880)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 .../tts_pipeline/full_context_label.py        | 160 +++++++++---------
 1 file changed, 84 insertions(+), 76 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index 5ca599276..1e61a17b0 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -1,7 +1,7 @@
 import re
 from dataclasses import dataclass
 from itertools import chain
-from typing import Dict, List, Optional
+from typing import Self
 
 import pyopenjtalk
 
@@ -14,11 +14,11 @@ class Phoneme:
 
     Attributes
     ----------
-    contexts: Dict[str, str]
+    contexts: dict[str, str]
         音素の元
     """
 
-    contexts: Dict[str, str]
+    contexts: dict[str, str]
 
     @classmethod
     def from_label(cls, label: str):
@@ -81,11 +81,11 @@ def label(self):
     @property
     def phoneme(self):
         """
-        音素クラスの中で、発声に必要な要素を返す
+        音素クラスの中で、発声に必要なcontextを返す
         Returns
         -------
         phoneme : str
-            発声に必要な要素を返す
+            発声に必要なcontextを返す
         """
         return self.contexts["p3"]
 
@@ -111,13 +111,13 @@ class Mora:
 
     Attributes
     ----------
-    consonant : Optional[Phoneme]
+    consonant : Phoneme | None
         子音
     vowel : Phoneme
         母音
     """
 
-    consonant: Optional[Phoneme]
+    consonant: Phoneme | None
     vowel: Phoneme
 
     def set_context(self, key: str, value: str):
@@ -141,7 +141,7 @@ def phonemes(self):
         音素群を返す
         Returns
         -------
-        phonemes : List[Phoneme]
+        phonemes : list[Phoneme]
             母音しかない場合は母音のみ、子音もある場合は子音、母音の順番でPhonemeのリストを返す
         """
         if self.consonant is not None:
@@ -155,7 +155,7 @@ def labels(self):
         ラベル群を返す
         Returns
         -------
-        labels : List[str]
+        labels : list[str]
             Moraに含まれるすべてのラベルを返す
         """
         return [p.label for p in self.phonemes]
@@ -168,62 +168,69 @@ class AccentPhrase:
     同じアクセントのMoraを複数保持する
     Attributes
     ----------
-    moras : List[Mora]
+    moras : list[Mora]
         音韻のリスト
     accent : int
         アクセント
     """
 
-    moras: List[Mora]
+    moras: list[Mora]
     accent: int
     is_interrogative: bool
 
     @classmethod
-    def from_phonemes(cls, phonemes: List[Phoneme]):
-        """
-        PhonemeのリストからAccentPhraseクラスを作成する
-        Parameters
-        ----------
-        phonemes : List[Phoneme]
-            phonemeのリストを渡す
+    def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
+        """音素系列をcontextで区切りAccentPhraseインスタンスを生成する"""
 
-        Returns
-        -------
-        accent_phrase : AccentPhrase
-            AccentPhraseクラスを返す
-        """
-        moras: List[Mora] = []
+        # NOTE:「モーラごとの音素系列」は音素系列をcontextで区切り生成される。
+
+        moras: list[Mora] = []  # モーラ系列
+        mora_phonemes: list[Phoneme] = []  # モーラごとの音素系列を一時保存するコンテナ
 
-        mora_phonemes: List[Phoneme] = []
         for phoneme, next_phoneme in zip(phonemes, phonemes[1:] + [None]):
-            # workaround for Hihosiba/voicevox_engine#57
-            # (py)openjtalk によるアクセント句内のモーラへの附番は 49 番目まで
-            # 49 番目のモーラについて、続く音素のモーラ番号を単一モーラの特定に使えない
+            # モーラ抽出を打ち切る（ワークアラウンド、VOICEVOX/voicevox_engine#57）
+            # context a2（モーラ番号）の最大値が 49 であるため、49番目以降のモーラでは音素のモーラ番号を区切りに使えない
             if int(phoneme.contexts["a2"]) == 49:
                 break
 
+            # 区切りまで音素系列を一時保存する
             mora_phonemes.append(phoneme)
 
+            # 一時的な音素系列を確定させて処理する
+            # a2はアクセント句内でのモーラ番号(1~49)
             if (
                 next_phoneme is None
                 or phoneme.contexts["a2"] != next_phoneme.contexts["a2"]
             ):
+                # モーラごとの音素系列長に基づいて子音と母音を得る
                 if len(mora_phonemes) == 1:
                     consonant, vowel = None, mora_phonemes[0]
                 elif len(mora_phonemes) == 2:
                     consonant, vowel = mora_phonemes[0], mora_phonemes[1]
                 else:
                     raise ValueError(mora_phonemes)
+                # 子音と母音からモーラを生成して保存する
                 mora = Mora(consonant=consonant, vowel=vowel)
                 moras.append(mora)
+                # 次に向けてリセット
                 mora_phonemes = []
 
+        # アクセント位置を決定する
+        # f2はアクセント句のアクセント位置(1~49)
         accent = int(moras[0].vowel.contexts["f2"])
-        # workaround for Hihosiba/voicevox_engine#55
-        # アクセント位置とするキー f2 の値がアクセント句内のモーラ数を超える場合がある
+        # f2 の値がアクセント句内のモーラ数を超える場合はクリップ（ワークアラウンド、VOICEVOX/voicevox_engine#55 を参照）
         accent = accent if accent <= len(moras) else len(moras)
+
+        # 疑問文か否か判定する（末尾モーラ母音のcontextに基づく）
+        # f3はアクセント句が疑問文かどうか（1で疑問文）
         is_interrogative = moras[-1].vowel.contexts["f3"] == "1"
-        return cls(moras=moras, accent=accent, is_interrogative=is_interrogative)
+
+        # AccentPhrase インスタンスを生成する
+        accent_phrase = cls(
+            moras=moras, accent=accent, is_interrogative=is_interrogative
+        )
+
+        return accent_phrase
 
     def set_context(self, key: str, value: str):
         """
@@ -244,7 +251,7 @@ def phonemes(self):
         音素群を返す
         Returns
         -------
-        phonemes : List[Phoneme]
+        phonemes : list[Phoneme]
             AccentPhraseに間接的に含まれる全てのPhonemeを返す
         """
         return list(chain.from_iterable(m.phonemes for m in self.moras))
@@ -255,7 +262,7 @@ def labels(self):
         ラベル群を返す
         Returns
         -------
-        labels : List[str]
+        labels : list[str]
             AccentPhraseに間接的に含まれる全てのラベルを返す
         """
         return [p.label for p in self.phonemes]
@@ -288,41 +295,43 @@ class BreathGroup:
     アクセントの異なるアクセント句を複数保持する
     Attributes
     ----------
-    accent_phrases : List[AccentPhrase]
+    accent_phrases : list[AccentPhrase]
         アクセント句のリスト
     """
 
-    accent_phrases: List[AccentPhrase]
+    accent_phrases: list[AccentPhrase]
 
     @classmethod
-    def from_phonemes(cls, phonemes: List[Phoneme]):
-        """
-        PhonemeのリストからBreathGroupクラスを作成する
-        Parameters
-        ----------
-        phonemes : List[Phoneme]
-            phonemeのリストを渡す
+    def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
+        """音素系列をcontextで区切りBreathGroupインスタンスを生成する"""
+
+        # NOTE:「アクセント句ごとの音素系列」は音素系列をcontextで区切り生成される。
+
+        accent_phrases: list[AccentPhrase] = []  # アクセント句系列
+        accent_phonemes: list[Phoneme] = []  # アクセント句ごとの音素系列を一時保存するコンテナ
 
-        Returns
-        -------
-        breath_group : BreathGroup
-            BreathGroupクラスを返す
-        """
-        accent_phrases: List[AccentPhrase] = []
-        accent_phonemes: List[Phoneme] = []
         for phoneme, next_phoneme in zip(phonemes, phonemes[1:] + [None]):
+            # 区切りまで音素系列を一時保存する
             accent_phonemes.append(phoneme)
 
+            # 一時的な音素系列を確定させて処理する
+            # i3はBreathGroupの番号
+            # f5はBreathGroup内でのアクセント句の番号
             if (
                 next_phoneme is None
                 or phoneme.contexts["i3"] != next_phoneme.contexts["i3"]
                 or phoneme.contexts["f5"] != next_phoneme.contexts["f5"]
             ):
+                # アクセント句を生成して保存する
                 accent_phrase = AccentPhrase.from_phonemes(accent_phonemes)
                 accent_phrases.append(accent_phrase)
+                # 次に向けてリセット
                 accent_phonemes = []
 
-        return cls(accent_phrases=accent_phrases)
+        # BreathGroup インスタンスを生成する
+        breath_group = cls(accent_phrases=accent_phrases)
+
+        return breath_group
 
     def set_context(self, key: str, value: str):
         """
@@ -343,7 +352,7 @@ def phonemes(self):
         音素群を返す
         Returns
         -------
-        phonemes : List[Phoneme]
+        phonemes : list[Phoneme]
             BreathGroupに間接的に含まれる全てのPhonemeを返す
         """
         return list(
@@ -358,7 +367,7 @@ def labels(self):
         ラベル群を返す
         Returns
         -------
-        labels : List[str]
+        labels : list[str]
             BreathGroupに間接的に含まれる全てのラベルを返す
         """
         return [p.label for p in self.phonemes]
@@ -371,46 +380,45 @@ class Utterance:
     発声の区切りと無音を複数保持する
     Attributes
     ----------
-    breath_groups : List[BreathGroup]
+    breath_groups : list[BreathGroup]
         発声の区切りのリスト
-    pauses : List[Phoneme]
+    pauses : list[Phoneme]
         無音のリスト
     """
 
-    breath_groups: List[BreathGroup]
-    pauses: List[Phoneme]
+    breath_groups: list[BreathGroup]
+    pauses: list[Phoneme]
 
     @classmethod
-    def from_phonemes(cls, phonemes: List[Phoneme]):
-        """
-        Phonemeの完全なリストからUtteranceクラスを作成する
-        Parameters
-        ----------
-        phonemes : List[Phoneme]
-            phonemeのリストを渡す
+    def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
+        """音素系列をポーズで区切りUtteranceインスタンスを生成する"""
 
-        Returns
-        -------
-        utterance : Utterance
-            Utteranceクラスを返す
-        """
-        pauses: List[Phoneme] = []
+        # NOTE:「BreathGroupごとの音素系列」は音素系列をポーズで区切り生成される。
+
+        pauses: list[Phoneme] = []  # ポーズ音素のリスト
+        breath_groups: list[BreathGroup] = []  # BreathGroup のリスト
+        group_phonemes: list[Phoneme] = []  # BreathGroupごとの音素系列を一時保存するコンテナ
 
-        breath_groups: List[BreathGroup] = []
-        group_phonemes: List[Phoneme] = []
         for phoneme in phonemes:
+            # ポーズが出現するまで音素系列を一時保存する
             if not phoneme.is_pause():
                 group_phonemes.append(phoneme)
 
+            # 一時的な音素系列を確定させて処理する
             else:
+                # ポーズ音素を保存する
                 pauses.append(phoneme)
-
                 if len(group_phonemes) > 0:
+                    # 音素系列からBreathGroupを生成して保存する
                     breath_group = BreathGroup.from_phonemes(group_phonemes)
                     breath_groups.append(breath_group)
+                    # 次に向けてリセット
                     group_phonemes = []
 
-        return cls(breath_groups=breath_groups, pauses=pauses)
+        # Utteranceインスタンスを生成する
+        utterance = cls(breath_groups=breath_groups, pauses=pauses)
+
+        return utterance
 
     def set_context(self, key: str, value: str):
         """
@@ -431,7 +439,7 @@ def phonemes(self):
         音素群を返す
         Returns
         -------
-        phonemes : List[Phoneme]
+        phonemes : list[Phoneme]
             Utteranceクラスに直接的・間接的に含まれる、全てのPhonemeを返す
         """
         accent_phrases = list(
@@ -496,7 +504,7 @@ def phonemes(self):
             ),
         )
 
-        phonemes: List[Phoneme] = []
+        phonemes: list[Phoneme] = []
         for i in range(len(self.pauses)):
             if self.pauses[i] is not None:
                 phonemes += [self.pauses[i]]
@@ -512,7 +520,7 @@ def labels(self):
         ラベル群を返す
         Returns
         -------
-        labels : List[str]
+        labels : list[str]
             Utteranceクラスに直接的・間接的に含まれる全てのラベルを返す
         """
         return [p.label for p in self.phonemes]

From b8e1831d1eabb178b0eba8c039e8b97aedf3279b Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 16 Dec 2023 21:37:33 +0900
Subject: [PATCH 029/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E7=B5=B1?=
 =?UTF-8?q?=E4=B8=80=E3=83=89=E3=83=A1=E3=82=A4=E3=83=B3=E7=94=A8=E8=AA=9E?=
 =?UTF-8?q?=20`=E9=9F=B3=E5=A3=B0=E5=90=88=E6=88=90=E7=94=A8=E3=81=AE?=
 =?UTF-8?q?=E3=82=AF=E3=82=A8=E3=83=AA`=20(#863)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                      |  4 ++--
 run.py                                         |  4 ++--
 voicevox_engine/dev/synthesis_engine/mock.py   |  2 +-
 voicevox_engine/model.py                       |  2 +-
 voicevox_engine/tts_pipeline/tts_engine.py     | 18 +++++++++---------
 .../tts_pipeline/tts_engine_base.py            |  8 ++++----
 6 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 20267570c..3e0eb1931 100644
--- a/README.md
+++ b/README.md
@@ -217,7 +217,7 @@ curl -s -X GET "127.0.0.1:50021/presets" > presets.json
 preset_id=$(cat presets.json | sed -r 's/^.+"id"\:\s?([0-9]+?).+$/\1/g')
 style_id=$(cat presets.json | sed -r 's/^.+"style_id"\:\s?([0-9]+?).+$/\1/g')
 
-# AudioQueryの取得
+# 音声合成用のクエリを取得
 curl -s \
     -X POST \
     "127.0.0.1:50021/audio_query_from_preset?preset_id=$preset_id"\
@@ -589,7 +589,7 @@ VOICEVOX ENGINE リポジトリを fork し、一部の機能を改造するの
 ダミーのアイコンなどが用意されているので適宜変更してください。
 
 音声合成は`voicevox_engine/synthesis_engine/synthesis_engine.py`で行われています。
-VOICEVOX API での音声合成は、エンジン側で音声合成クエリ`AudioQuery`の初期値を作成してユーザーに返し、ユーザーが必要に応じてクエリを編集したあと、エンジンがクエリに従って音声合成することで実現しています。
+VOICEVOX API での音声合成は、エンジン側で音声合成用のクエリ `AudioQuery` の初期値を作成してユーザーに返し、ユーザーが必要に応じてクエリを編集したあと、エンジンがクエリに従って音声合成することで実現しています。
 クエリ作成は`/audio_query`エンドポイントで、音声合成は`/synthesis`エンドポイントで行っており、最低この２つに対応すれば VOICEVOX API に準拠したことになります。
 
 #### マルチエンジン機能対応エンジンの配布方法
diff --git a/run.py b/run.py
index ec38fd41d..aa3c3afec 100644
--- a/run.py
+++ b/run.py
@@ -246,7 +246,7 @@ def audio_query(
         core_version: str | None = None,
     ) -> AudioQuery:
         """
-        クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
+        音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
         """
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
@@ -276,7 +276,7 @@ def audio_query_from_preset(
         core_version: str | None = None,
     ) -> AudioQuery:
         """
-        クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
+        音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
         """
         engine = get_engine(core_version)
         try:
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index ec366b31b..3cb72dc79 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -87,7 +87,7 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int) -> np.ndarray:
         Parameters
         ----------
         query : AudioQuery
-            /audio_query APIで得たjson
+            音声合成用のクエリ
         style_id : int
             スタイルID
 
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index 1c02aa168..c4d19ba69 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -59,7 +59,7 @@ class AudioQuery(BaseModel):
     postPhonemeLength: float = Field(title="音声の後の無音時間")
     outputSamplingRate: int = Field(title="音声データの出力サンプリングレート")
     outputStereo: bool = Field(title="音声データをステレオ出力するか否か")
-    kana: Optional[str] = Field(title="[読み取り専用]AquesTalk風記法によるテキスト。音声合成クエリとしては無視される")
+    kana: Optional[str] = Field(title="[読み取り専用]AquesTalk風記法によるテキスト。音声合成用のクエリとしては無視される")
 
     def __hash__(self):
         items = [
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 372900c6f..c05c122dd 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -130,7 +130,7 @@ def apply_prepost_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     moras : List[Mora]
         モーラ時系列
     query : AudioQuery
-        音声合成クエリ
+        音声合成用のクエリ
     Returns
     -------
     moras : List[Mora]
@@ -150,7 +150,7 @@ def apply_speed_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     moras : list[Mora]
         モーラ系列
     query : AudioQuery
-        音声合成クエリ
+        音声合成用のクエリ
     Returns
     -------
     moras : list[Mora]
@@ -216,7 +216,7 @@ def apply_pitch_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     moras : list[Mora]
         モーラ系列
     query : AudioQuery
-        音声合成クエリ
+        音声合成用のクエリ
     Returns
     -------
     moras : list[Mora]
@@ -235,7 +235,7 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     moras : list[Mora]
         モーラ系列
     query : AudioQuery
-        音声合成クエリ
+        音声合成用のクエリ
     Returns
     -------
     moras : list[Mora]
@@ -281,7 +281,7 @@ def apply_volume_scale(wave: numpy.ndarray, query: AudioQuery) -> numpy.ndarray:
     wave : numpy.ndarray
         音声波形
     query : AudioQuery
-        音声合成クエリ
+        音声合成用のクエリ
     Returns
     -------
     wave : numpy.ndarray
@@ -326,7 +326,7 @@ def apply_output_sampling_rate(
     sr_wave : int
         `wave`のサンプリングレート
     query : AudioQuery
-        音声合成クエリ
+        音声合成用のクエリ
     Returns
     -------
     wave : ndarray
@@ -348,7 +348,7 @@ def apply_output_stereo(wave: ndarray, query: AudioQuery) -> ndarray:
     wave : ndarray
         音声波形
     query : AudioQuery
-        音声合成クエリ
+        音声合成用のクエリ
     Returns
     -------
     wave : ndarray
@@ -599,11 +599,11 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int):
 
     def _synthesis_impl(self, query: AudioQuery, style_id: int):
         """
-        音声合成クエリから音声合成に必要な情報を構成し、実際に音声合成を行う
+        音声合成用のクエリから音声合成に必要な情報を構成し、実際に音声合成を行う
         Parameters
         ----------
         query : AudioQuery
-            音声合成クエリ
+            音声合成用のクエリ
         style_id : int
             スタイルID
         Returns
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index 502580f8e..f4eeda039 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -307,12 +307,12 @@ def synthesis(
         enable_interrogative_upspeak: bool = True,
     ) -> np.ndarray:
         """
-        音声合成クエリ内の疑問文指定されたMoraを変形した後、
+        音声合成用のクエリ内の疑問文指定されたMoraを変形した後、
         継承先における実装`_synthesis_impl`を使い音声合成を行う
         Parameters
         ----------
         query : AudioQuery
-            音声合成クエリ
+            音声合成用のクエリ
         style_id : int
             スタイルID
         enable_interrogative_upspeak : bool
@@ -337,11 +337,11 @@ def _synthesis_impl(
         style_id: int,
     ) -> np.ndarray:
         """
-        音声合成クエリから音声合成に必要な情報を構成し、実際に音声合成を行う
+        音声合成用のクエリから音声合成に必要な情報を構成し、実際に音声合成を行う
         Parameters
         ----------
         query : AudioQuery
-            音声合成クエリ
+            音声合成用のクエリ
         style_id : int
             スタイルID
         Returns

From 58a993d3d5ef299f92c731c61113809cd618808f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 17 Dec 2023 08:27:15 +0900
Subject: [PATCH 030/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E9=9F=B3?=
 =?UTF-8?q?=E5=A3=B0=E3=83=A9=E3=82=A4=E3=83=96=E3=83=A9=E3=83=AA=E8=87=AA?=
 =?UTF-8?q?=E5=8B=95=E8=AA=AD=E3=81=BF=E8=BE=BC=E3=81=BF=20docs=20(#869)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/README.md b/README.md
index 3e0eb1931..589da6944 100644
--- a/README.md
+++ b/README.md
@@ -470,6 +470,19 @@ Mac では、`--runtime_dir`引数の代わりに`DYLD_LIBRARY_PATH`の指定が
 DYLD_LIBRARY_PATH="/path/to/onnx" python run.py --voicelib_dir="/path/to/voicevox_core"
 ```
 
+##### ユーザーディレクトリに配置する
+
+以下のディレクトリにある音声ライブラリは自動で読み込まれます。
+
+- ビルド版: `<user_data_dir>/voicevox-engine/core_libraries/`
+- Python 版: `<user_data_dir>/voicevox-engine-dev/core_libraries/`
+
+`<user_data_dir>`は OS によって異なります。
+
+- Windows: `C:\Users\<username>\AppData\Local\`
+- macOS: `/Users/<username>/Library/Application\ Support/`
+- Linux: `/home/<username>/.local/share/`
+
 ### ビルド
 
 この方法でビルドしたものは、リリースで公開されているものとは異なります。

From 3cb454f1323d4c2ed0ff3252707b0c3219b1fe83 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 17 Dec 2023 08:41:03 +0900
Subject: [PATCH 031/177] =?UTF-8?q?Refactor:=20=E3=83=86=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E4=B8=8D=E4=BD=BF=E7=94=A8=E5=A4=89=E6=95=B0=E5=89=8A=E9=99=A4?=
 =?UTF-8?q?=E3=81=A8utility=E3=81=AB=E3=82=88=E3=82=8B=E7=B0=A1=E7=95=A5?=
 =?UTF-8?q?=E5=8C=96=20(#882)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_synthesis_engine.py | 210 +++++-----------------------------
 1 file changed, 27 insertions(+), 183 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 00730bb5d..84064bdea 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -481,7 +481,6 @@ def test_feat_to_framescale():
     #                        Pre k  o  N pau h  i  h  O Pst
     true_frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
     n_frame = sum(true_frame_per_phoneme)
-    true_frame_per_phoneme = numpy.array(true_frame_per_phoneme, dtype=numpy.int32)
     # phoneme
     #                     Pr  k   o   o  N  N pau  h   i   i   h   h  O Pt Pt Pt
     frame_phoneme_idxs = [0, 23, 30, 30, 4, 4, 0, 19, 21, 21, 19, 19, 5, 0, 0, 0]
@@ -489,9 +488,6 @@ def test_feat_to_framescale():
     for frame_idx, phoneme_idx in enumerate(frame_phoneme_idxs):
         true_frame_phoneme[frame_idx, phoneme_idx] = 1.0
     # Pitch
-    #          Pre   ko      N    pau   hi    hO   Pst
-    true_f0 = [0.0, 200.0, 200.0, 0.0, 500.0, 0.0, 0.0]  # mean 300
-    true_f0 = [0.0, 250.0, 250.0, 0.0, 400.0, 0.0, 0.0]  # intonationScale 0.5
     #                   paw ko  N pau hi hO paw
     # frame_per_vowel = [1, 3,  2, 1, 3, 3, 3]
     #           pau   ko     ko     ko      N      N
@@ -532,91 +528,21 @@ def setUp(self):
         self.accent_phrases_hello_hiho = [
             AccentPhrase(
                 moras=[
-                    Mora(
-                        text="コ",
-                        consonant="k",
-                        consonant_length=0.0,
-                        vowel="o",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ン",
-                        consonant=None,
-                        consonant_length=None,
-                        vowel="N",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ニ",
-                        consonant="n",
-                        consonant_length=0.0,
-                        vowel="i",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="チ",
-                        consonant="ch",
-                        consonant_length=0.0,
-                        vowel="i",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ワ",
-                        consonant="w",
-                        consonant_length=0.0,
-                        vowel="a",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
+                    _gen_mora("コ", "k", 0.0, "o", 0.0, 0.0),
+                    _gen_mora("ン", None, None, "N", 0.0, 0.0),
+                    _gen_mora("ニ", "n", 0.0, "i", 0.0, 0.0),
+                    _gen_mora("チ", "ch", 0.0, "i", 0.0, 0.0),
+                    _gen_mora("ワ", "w", 0.0, "a", 0.0, 0.0),
                 ],
                 accent=5,
-                pause_mora=Mora(
-                    text="、",
-                    consonant=None,
-                    consonant_length=None,
-                    vowel="pau",
-                    vowel_length=0.0,
-                    pitch=0.0,
-                ),
+                pause_mora=_gen_mora("、", None, None, "pau", 0.0, 0.0),
             ),
             AccentPhrase(
                 moras=[
-                    Mora(
-                        text="ヒ",
-                        consonant="h",
-                        consonant_length=0.0,
-                        vowel="i",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ホ",
-                        consonant="h",
-                        consonant_length=0.0,
-                        vowel="o",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="デ",
-                        consonant="d",
-                        consonant_length=0.0,
-                        vowel="e",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ス",
-                        consonant="s",
-                        consonant_length=0.0,
-                        vowel="U",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
+                    _gen_mora("ヒ", "h", 0.0, "i", 0.0, 0.0),
+                    _gen_mora("ホ", "h", 0.0, "o", 0.0, 0.0),
+                    _gen_mora("デ", "d", 0.0, "e", 0.0, 0.0),
+                    _gen_mora("ス", "s", 0.0, "U", 0.0, 0.0),
                 ],
                 accent=1,
                 pause_mora=None,
@@ -626,9 +552,7 @@ def setUp(self):
         self.yukarin_s_mock = core.yukarin_s_forward
         self.yukarin_sa_mock = core.yukarin_sa_forward
         self.decode_mock = core.decode_forward
-        self.synthesis_engine = SynthesisEngine(
-            core=core,
-        )
+        self.synthesis_engine = SynthesisEngine(core=core)
 
     def test_to_flatten_moras(self):
         flatten_moras = to_flatten_moras(self.accent_phrases_hello_hiho)
@@ -640,30 +564,17 @@ def test_to_flatten_moras(self):
         )
 
     def test_split_mora(self):
+        # Outputs
         consonant_phoneme_list, vowel_phoneme_list, vowel_indexes = split_mora(
             self.phoneme_data_list_hello_hiho
         )
 
         self.assertEqual(vowel_indexes, [0, 2, 3, 5, 7, 9, 10, 12, 14, 16, 18, 19])
 
+        ps = ["pau", "o", "N", "i", "i", "a", "pau", "i", "o", "e", "U", "pau"]
+        true_vowel_phoneme_list = [OjtPhoneme(p) for p in ps]
         self.assertTrue(
-            is_same_ojt_phoneme_list(
-                vowel_phoneme_list,
-                [
-                    OjtPhoneme("pau"),
-                    OjtPhoneme("o"),
-                    OjtPhoneme("N"),
-                    OjtPhoneme("i"),
-                    OjtPhoneme("i"),
-                    OjtPhoneme("a"),
-                    OjtPhoneme("pau"),
-                    OjtPhoneme("i"),
-                    OjtPhoneme("o"),
-                    OjtPhoneme("e"),
-                    OjtPhoneme("U"),
-                    OjtPhoneme("pau"),
-                ],
-            )
+            is_same_ojt_phoneme_list(vowel_phoneme_list, true_vowel_phoneme_list)
         )
         self.assertTrue(
             is_same_ojt_phoneme_list(
@@ -702,15 +613,13 @@ def test_pre_process(self):
                 if mora.consonant is not None:
                     self.assertTrue(
                         is_same_phoneme(
-                            phoneme_data_list[phoneme_index],
-                            OjtPhoneme(mora.consonant),
+                            phoneme_data_list[phoneme_index], OjtPhoneme(mora.consonant)
                         )
                     )
                     phoneme_index += 1
                 self.assertTrue(
                     is_same_phoneme(
-                        phoneme_data_list[phoneme_index],
-                        OjtPhoneme(mora.vowel),
+                        phoneme_data_list[phoneme_index], OjtPhoneme(mora.vowel)
                     )
                 )
                 phoneme_index += 1
@@ -718,17 +627,11 @@ def test_pre_process(self):
                 self.assertEqual(flatten_moras[mora_index], accent_phrase.pause_mora)
                 mora_index += 1
                 self.assertTrue(
-                    is_same_phoneme(
-                        phoneme_data_list[phoneme_index],
-                        OjtPhoneme("pau"),
-                    )
+                    is_same_phoneme(phoneme_data_list[phoneme_index], OjtPhoneme("pau"))
                 )
                 phoneme_index += 1
         self.assertTrue(
-            is_same_phoneme(
-                phoneme_data_list[phoneme_index],
-                OjtPhoneme("pau"),
-            )
+            is_same_phoneme(phoneme_data_list[phoneme_index], OjtPhoneme("pau"))
         )
 
     def test_replace_phoneme_length(self):
@@ -742,33 +645,12 @@ def test_replace_phoneme_length(self):
         phoneme_list = yukarin_s_args["phoneme_list"]
         self.assertEqual(list_length, 20)
         self.assertEqual(list_length, len(phoneme_list))
+        true_phoneme_list_1 = [0, 23, 30, 4, 28, 21, 10, 21, 42, 7]
+        true_phoneme_list_2 = [0, 19, 21, 19, 30, 12, 14, 35, 6, 0]
+        true_phoneme_list = true_phoneme_list_1 + true_phoneme_list_2
         numpy.testing.assert_array_equal(
             phoneme_list,
-            numpy.array(
-                [
-                    0,
-                    23,
-                    30,
-                    4,
-                    28,
-                    21,
-                    10,
-                    21,
-                    42,
-                    7,
-                    0,
-                    19,
-                    21,
-                    19,
-                    30,
-                    12,
-                    14,
-                    35,
-                    6,
-                    0,
-                ],
-                dtype=numpy.int64,
-            ),
+            numpy.array(true_phoneme_list, dtype=numpy.int64),
         )
         self.assertEqual(yukarin_s_args["style_id"], 1)
 
@@ -827,41 +709,11 @@ def test_replace_mora_pitch(self):
 
         numpy.testing.assert_array_equal(
             vowel_phoneme_list,
-            numpy.array(
-                [
-                    0,
-                    30,
-                    4,
-                    21,
-                    21,
-                    7,
-                    0,
-                    21,
-                    30,
-                    14,
-                    6,
-                    0,
-                ]
-            ),
+            numpy.array([0, 30, 4, 21, 21, 7, 0, 21, 30, 14, 6, 0]),
         )
         numpy.testing.assert_array_equal(
             consonant_phoneme_list,
-            numpy.array(
-                [
-                    -1,
-                    23,
-                    -1,
-                    28,
-                    10,
-                    42,
-                    -1,
-                    19,
-                    19,
-                    12,
-                    35,
-                    -1,
-                ]
-            ),
+            numpy.array([-1, 23, -1, 28, 10, 42, -1, 19, 19, 12, 35, -1]),
         )
         numpy.testing.assert_array_equal(
             start_accent_list, numpy.array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0])
@@ -1050,18 +902,10 @@ def synthesis_test_base(self, audio_query: AudioQuery):
         self.assertTrue(assert_result_count >= int(len(true_result) / 5) * 4)
 
     def test_synthesis(self):
-        audio_query = AudioQuery(
-            accent_phrases=deepcopy(self.accent_phrases_hello_hiho),
-            speedScale=1.0,
-            pitchScale=1.0,
-            intonationScale=1.0,
-            volumeScale=1.0,
+        audio_query = _gen_query(
+            deepcopy(self.accent_phrases_hello_hiho),
             prePhonemeLength=0.1,
             postPhonemeLength=0.1,
-            outputSamplingRate=24000,
-            outputStereo=False,
-            # このテスト内では使わないので生成不要
-            kana="",
         )
 
         self.synthesis_test_base(audio_query)

From 11f080ea0168e6c73574be5f84b7b17d27ead69e Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 17 Dec 2023 08:53:35 +0900
Subject: [PATCH 032/177] =?UTF-8?q?=E4=BF=AE=E6=AD=A3:=20=E8=BE=9E?=
 =?UTF-8?q?=E6=9B=B8=E6=9B=B4=E6=96=B0=E6=99=82=E3=81=AE=E3=83=95=E3=82=A1?=
 =?UTF-8?q?=E3=82=A4=E3=83=AB=E3=83=AA=E3=83=8D=E3=83=BC=E3=83=A0=E3=82=A8?=
 =?UTF-8?q?=E3=83=A9=E3=83=BC=20(#884)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/user_dict.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
index f720ac4aa..abdf39f48 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict.py
@@ -75,9 +75,11 @@ def update_dict(
         コンパイル済み辞書ファイルのパス
     """
     random_string = uuid4()
-    tmp_csv_path = save_dir / f".tmp.dict_csv-{random_string}"  # csv形式辞書データの一時保存ファイル
-    tmp_compiled_path = (
-        save_dir / f".tmp.dict_compiled-{random_string}"
+    tmp_csv_path = compiled_dict_path.with_suffix(
+        f".dict_csv-{random_string}.tmp"
+    )  # csv形式辞書データの一時保存ファイル
+    tmp_compiled_path = compiled_dict_path.with_suffix(
+        f".dict_compiled-{random_string}.tmp"
     )  # コンパイル済み辞書データの一時保存ファイル
 
     try:

From 3bd9199e6517e0bc0e91a6dc1cbff7831c4a9776 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 17 Dec 2023 23:36:47 +0900
Subject: [PATCH 033/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20AquesTalk?=
 =?UTF-8?q?=E9=A2=A8=E8=A8=98=E6=B3=95=E3=83=91=E3=83=BC=E3=82=B9=E8=A6=8F?=
 =?UTF-8?q?=E5=89=87=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E3=81=AE=E8=BF=BD?=
 =?UTF-8?q?=E5=8A=A0=20(#864)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: AquesTalk風記法パース規則コメント

* 提案

* ミス

* Update voicevox_engine/tts_pipeline/kana_parser.py

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/tts_pipeline/kana_parser.py | 36 +++++++++++++--------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/kana_parser.py b/voicevox_engine/tts_pipeline/kana_parser.py
index 430960156..87a4624a8 100644
--- a/voicevox_engine/tts_pipeline/kana_parser.py
+++ b/voicevox_engine/tts_pipeline/kana_parser.py
@@ -1,6 +1,17 @@
 """
 「AquesTalk風記法」を実装した AquesTalk風記法テキスト <-> アクセント句系列 変換。
-記法定義: `https://github.com/VOICEVOX/voicevox_engine/blob/master/README.md#読み方を-aquestalk風記法で取得修正するサンプルコード` # noqa
+
+記法の規則は以下の通り。
+
+- 読みはカタカナのみ
+- `/` で区切り
+- `、` で無音付き区切り
+- `_` で無声化
+- `'` でアクセント位置
+- `？` で疑問文
+- アクセント位置はちょうど１つ
+
+NOTE: ユーザー向け案内 `https://github.com/VOICEVOX/voicevox_engine/blob/master/README.md#読み方を-aquestalk風記法で取得修正するサンプルコード` # noqa
 """
 
 from typing import List, Optional
@@ -30,7 +41,7 @@
         is_interrogative=False,
     )
     if vowel in ["a", "i", "u", "e", "o"]:
-        # 手前に`_`を入れると無声化
+        # 「`_` で無声化」の実装
         # 例: "_ホ" -> "hO"
         _text2mora_with_unvoice[_UNVOICE_SYMBOL + text] = Mora(
             text=text,
@@ -69,13 +80,14 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
     while base_index < len(phrase):
         outer_loop += 1
 
-        # `'`の手前がアクセント位置
+        # 「`'` でアクセント位置」の実装
         if phrase[base_index] == _ACCENT_SYMBOL:
+            # 「アクセント位置はちょうど１つ」の実装
             if len(moras) == 0:
                 raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase)
-            # すでにアクセント位置がある場合はエラー
             if accent_index is not None:
                 raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase)
+
             accent_index = len(moras)
             base_index += 1
             continue
@@ -89,8 +101,6 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
                 break
             stack += phrase[watch_index]
             if stack in _text2mora_with_unvoice:
-                # より長い要素からなるモーラが見つかれば上書き（longest match）
-                # 例: phrase "キャ" -> "キ" 検出 -> "キャ" 検出/上書き -> Mora("キャ")
                 matched_text = stack
         if matched_text is None:
             raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack)
@@ -137,7 +147,7 @@ def parse_kana(text: str) -> List[AccentPhrase]:
                 )
             phrase_base = i + 1
 
-            # アクセント句末に`？`で疑問文
+            # 「`？` で疑問文」の実装
             is_interrogative = _WIDE_INTERROGATION_MARK in phrase
             if is_interrogative:
                 if _WIDE_INTERROGATION_MARK in phrase[:-1]:
@@ -149,7 +159,7 @@ def parse_kana(text: str) -> List[AccentPhrase]:
 
             accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase)
 
-            # `、`で無音区間を挿入
+            # 「`、` で無音付き区切り」の実装
             if i < len(text) and text[i] == _PAUSE_DELIMITER:
                 accent_phrase.pause_mora = Mora(
                     text="、",
@@ -182,23 +192,23 @@ def create_kana(accent_phrases: List[AccentPhrase]) -> str:
     # アクセント句を先頭から逐次パースし、`text`末尾にAquesTalk風記法の文字を都度追加（ループ）
     for i, phrase in enumerate(accent_phrases):
         for j, mora in enumerate(phrase.moras):
-            # Rule3: "カナの手前に`_`を入れるとそのカナは無声化される"
+            # 「`_` で無声化」の実装
             if mora.vowel in ["A", "I", "U", "E", "O"]:
                 text += _UNVOICE_SYMBOL
             text += mora.text
-            # `'`でアクセント位置
+            # 「`'` でアクセント位置」の実装
             if j + 1 == phrase.accent:
                 text += _ACCENT_SYMBOL
 
-        # Rule5: "アクセント句末に`？`(全角)を入れることにより疑問文の発音ができる"
+        # 「`？` で疑問文」の実装
         if phrase.is_interrogative:
             text += _WIDE_INTERROGATION_MARK
 
         if i < len(accent_phrases) - 1:
+            # 「`/` で区切り」の実装
             if phrase.pause_mora is None:
-                # アクセント句区切り
                 text += _NOPAUSE_DELIMITER
+            # 「`、` で無音付き区切り」の実装
             else:
-                # 無音でアクセント句区切り
                 text += _PAUSE_DELIMITER
     return text

From 4ef4218822de666ea1272338061a14ffd8f690a3 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Mon, 18 Dec 2023 04:33:15 +0900
Subject: [PATCH 034/177] =?UTF-8?q?BLD:=20PyInstaller=E3=82=92v6=E3=81=B8?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0=20(#857)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 .github/workflows/build.yml             |  8 +--
 poetry.lock                             | 61 ++++++++---------
 pyproject.toml                          |  6 +-
 requirements-dev.txt                    | 10 +--
 run.py                                  |  3 +-
 run.spec                                | 88 +++++++++++++++----------
 voicevox_engine/user_dict.py            |  4 +-
 voicevox_engine/utility/__init__.py     |  5 +-
 voicevox_engine/utility/path_utility.py | 51 ++++++++++----
 9 files changed, 140 insertions(+), 96 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 218efd2c4..1054b5e47 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -464,10 +464,10 @@ jobs:
             LIBONNXRUNTIME_PATH=download/onnxruntime/lib/libonnxruntime.so
           fi
 
-          CORE_MODEL_DIR_PATH="download/core/model" \
-          LIBCORE_PATH="$LIBCORE_PATH" \
-          LIBONNXRUNTIME_PATH="$LIBONNXRUNTIME_PATH" \
-          pyinstaller --noconfirm run.spec
+          pyinstaller --noconfirm run.spec -- \
+            --libcore_path="$LIBCORE_PATH" \
+            --libonnxruntime_path="$LIBONNXRUNTIME_PATH" \
+            --core_model_dir_path="download/core/model"
 
       - name: Gather DLL dependencies to dist/run/ (Windows)
         if: startsWith(matrix.os, 'windows-')
diff --git a/poetry.lock b/poetry.lock
index 6e7957c9a..128c9d660 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -13,13 +13,13 @@ files = [
 
 [[package]]
 name = "altgraph"
-version = "0.17.3"
+version = "0.17.4"
 description = "Python graph (network) package"
 optional = false
 python-versions = "*"
 files = [
-    {file = "altgraph-0.17.3-py2.py3-none-any.whl", hash = "sha256:c8ac1ca6772207179ed8003ce7687757c04b0b71536f81e2ac5755c6226458fe"},
-    {file = "altgraph-0.17.3.tar.gz", hash = "sha256:ad33358114df7c9416cdb8fa1eaa5852166c505118717021c6a8c7c7abbd03dd"},
+    {file = "altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff"},
+    {file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"},
 ]
 
 [[package]]
@@ -1020,13 +1020,13 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)",
 
 [[package]]
 name = "macholib"
-version = "1.16.2"
+version = "1.16.3"
 description = "Mach-O header analysis and editing"
 optional = false
 python-versions = "*"
 files = [
-    {file = "macholib-1.16.2-py2.py3-none-any.whl", hash = "sha256:44c40f2cd7d6726af8fa6fe22549178d3a4dfecc35a9cd15ea916d9c83a688e0"},
-    {file = "macholib-1.16.2.tar.gz", hash = "sha256:557bbfa1bb255c20e9abafe7ed6cd8046b48d9525db2f9b77d3122a63a2a8bf8"},
+    {file = "macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c"},
+    {file = "macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30"},
 ]
 
 [package.dependencies]
@@ -1612,46 +1612,47 @@ files = [
 
 [[package]]
 name = "pyinstaller"
-version = "5.13.2"
+version = "6.2.0"
 description = "PyInstaller bundles a Python application and all its dependencies into a single package."
 optional = false
-python-versions = "<3.13,>=3.7"
+python-versions = "<3.13,>=3.8"
 files = [
-    {file = "pyinstaller-5.13.2-py3-none-macosx_10_13_universal2.whl", hash = "sha256:16cbd66b59a37f4ee59373a003608d15df180a0d9eb1a29ff3bfbfae64b23d0f"},
-    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8f6dd0e797ae7efdd79226f78f35eb6a4981db16c13325e962a83395c0ec7420"},
-    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_i686.whl", hash = "sha256:65133ed89467edb2862036b35d7c5ebd381670412e1e4361215e289c786dd4e6"},
-    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:7d51734423685ab2a4324ab2981d9781b203dcae42839161a9ee98bfeaabdade"},
-    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:2c2fe9c52cb4577a3ac39626b84cf16cf30c2792f785502661286184f162ae0d"},
-    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c63ef6133eefe36c4b2f4daf4cfea3d6412ece2ca218f77aaf967e52a95ac9b8"},
-    {file = "pyinstaller-5.13.2-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:aadafb6f213549a5906829bb252e586e2cf72a7fbdb5731810695e6516f0ab30"},
-    {file = "pyinstaller-5.13.2-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:b2e1c7f5cceb5e9800927ddd51acf9cc78fbaa9e79e822c48b0ee52d9ce3c892"},
-    {file = "pyinstaller-5.13.2-py3-none-win32.whl", hash = "sha256:421cd24f26144f19b66d3868b49ed673176765f92fa9f7914cd2158d25b6d17e"},
-    {file = "pyinstaller-5.13.2-py3-none-win_amd64.whl", hash = "sha256:ddcc2b36052a70052479a9e5da1af067b4496f43686ca3cdda99f8367d0627e4"},
-    {file = "pyinstaller-5.13.2-py3-none-win_arm64.whl", hash = "sha256:27cd64e7cc6b74c5b1066cbf47d75f940b71356166031deb9778a2579bb874c6"},
-    {file = "pyinstaller-5.13.2.tar.gz", hash = "sha256:c8e5d3489c3a7cc5f8401c2d1f48a70e588f9967e391c3b06ddac1f685f8d5d2"},
+    {file = "pyinstaller-6.2.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:a1adbd3cf25dc90926d783eae0f444d65cdfecc7bcdf6da522c3ae3ff47b4c25"},
+    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:29d164394f1e949072f78a64c1e040f1c47b7f4aff08514c7666a031c8b44996"},
+    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_i686.whl", hash = "sha256:ba602a38d7403de89c38b8956b221ce6de0280730d269bab522492fcad82ee33"},
+    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:ebac06d99b80d2035594c3cc2fb5f2612d86289edd0510dbcbeb20a873f51d5a"},
+    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:fcfabc0ff1d38a4262c051dea3fdc1f7f106405c1f1b491b4c79cd28df19cab6"},
+    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:104430686149b2f1c135b2c17aa2967c85d54ef77dc92feb4e179ec846c0c467"},
+    {file = "pyinstaller-6.2.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:e87fd60292b53bb9965cb5a84122875469a2bd475fd0d0db0052a3f1be351f75"},
+    {file = "pyinstaller-6.2.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:8ec9d6c98972bb922cedb16a6638257aa66e5deadd79e2953f3464696237c413"},
+    {file = "pyinstaller-6.2.0-py3-none-win32.whl", hash = "sha256:e5561e9a9b946d835c8dbc11ae4c16cc21e62bc77d10cc043406dc2992dfb4c6"},
+    {file = "pyinstaller-6.2.0-py3-none-win_amd64.whl", hash = "sha256:3b586196277c4c54b69880650984c39c28bb6258c2b4b64200032e6ac69d53a0"},
+    {file = "pyinstaller-6.2.0-py3-none-win_arm64.whl", hash = "sha256:d0c87b605bf13c3a04dfaa1d2fa7cd36765b8137000eeadccba865e1d6a19bf0"},
+    {file = "pyinstaller-6.2.0.tar.gz", hash = "sha256:1ce77043929bf525be38289d78feecde0fcf15506215eda6500176a8715c5047"},
 ]
 
 [package.dependencies]
 altgraph = "*"
 macholib = {version = ">=1.8", markers = "sys_platform == \"darwin\""}
+packaging = ">=22.0"
 pefile = {version = ">=2022.5.30", markers = "sys_platform == \"win32\""}
 pyinstaller-hooks-contrib = ">=2021.4"
 pywin32-ctypes = {version = ">=0.2.1", markers = "sys_platform == \"win32\""}
 setuptools = ">=42.0.0"
 
 [package.extras]
-encryption = ["tinyaes (>=1.0.0)"]
+completion = ["argcomplete"]
 hook-testing = ["execnet (>=1.5.0)", "psutil", "pytest (>=2.7.3)"]
 
 [[package]]
 name = "pyinstaller-hooks-contrib"
-version = "2023.7"
+version = "2023.10"
 description = "Community maintained hooks for PyInstaller"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pyinstaller-hooks-contrib-2023.7.tar.gz", hash = "sha256:0c436a4c3506020e34116a8a7ddfd854c1ad6ddca9a8cd84500bd6e69c9e68f9"},
-    {file = "pyinstaller_hooks_contrib-2023.7-py2.py3-none-any.whl", hash = "sha256:3c10df14c0f71ab388dfbf1625375b087e7330d9444cbfd2b310ba027fa0cff0"},
+    {file = "pyinstaller-hooks-contrib-2023.10.tar.gz", hash = "sha256:4b4a998036abb713774cb26534ca06b7e6e09e4c628196017a10deb11a48747f"},
+    {file = "pyinstaller_hooks_contrib-2023.10-py2.py3-none-any.whl", hash = "sha256:6dc1786a8f452941245d5bb85893e2a33632ebdcbc4c23eea41f2ee08281b0c0"},
 ]
 
 [[package]]
@@ -2048,19 +2049,19 @@ files = [
 
 [[package]]
 name = "setuptools"
-version = "68.1.2"
+version = "69.0.2"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"},
-    {file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"},
+    {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"},
+    {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"},
 ]
 
 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
 testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
+testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
 
 [[package]]
 name = "shellingham"
@@ -2431,4 +2432,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.11"
-content-hash = "b3ef9f8c5445b3e481d666a4a3b6a73d44fa1159646cf64f480a19aa1999d0ee"
+content-hash = "eb3e0209e98c6df8760ef8dae1ccbd175af6a28e09ea5efc5e84b566b6c5b8d0"
diff --git a/pyproject.toml b/pyproject.toml
index 88926aa0a..c69cf96af 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,14 +51,14 @@ pyyaml = "^6.0"
 pyworld = "^0.3.0"
 requests = "^2.28.1"
 jinja2 = "^3.1.2"
-pyopenjtalk = {git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "b35fc89fe42948a28e33aed886ea145a51113f88"}
+pyopenjtalk = { git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "b35fc89fe42948a28e33aed886ea145a51113f88" }
 semver = "^3.0.0"
 platformdirs = "^3.10.0"
 soxr = "^0.3.6"
 
 [tool.poetry.group.dev.dependencies]
 cython = "^0.29.34,>=0.29.33" # NOTE: for Python 3.11
-pyinstaller = "^5.13"
+pyinstaller = "^6.2.0"
 pre-commit = "^2.16.0"
 atomicwrites = "^1.4.0"
 colorama = "^0.4.4"
@@ -74,7 +74,7 @@ mypy = "^1.6.0"
 pytest = "^6.2.5"
 coveralls = "^3.2.0"
 poetry = "^1.3.1"
-httpx = "^0.25.0" # NOTE: required by fastapi.testclient.TestClient
+httpx = "^0.25.0"          # NOTE: required by fastapi.testclient.TestClient
 
 [tool.poetry.group.license.dependencies]
 pip-licenses = "^4.2.0"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 51806c4da..a42435195 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,5 @@
 aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
-altgraph==0.17.3 ; python_version >= "3.11" and python_version < "3.12"
+altgraph==0.17.4 ; python_version >= "3.11" and python_version < "3.12"
 anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
 asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
 atomicwrites==1.4.1 ; python_version >= "3.11" and python_version < "3.12"
@@ -30,7 +30,7 @@ jeepney==0.8.0 ; python_version >= "3.11" and python_version < "3.12" and sys_pl
 jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12"
 jsonschema==4.17.3 ; python_version >= "3.11" and python_version < "3.12"
 keyring==24.2.0 ; python_version >= "3.11" and python_version < "3.12"
-macholib==1.16.2 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "darwin"
+macholib==1.16.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "darwin"
 markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12"
 more-itertools==10.1.0 ; python_version >= "3.11" and python_version < "3.12"
 msgpack==1.0.5 ; python_version >= "3.11" and python_version < "3.12"
@@ -48,8 +48,8 @@ pre-commit==2.21.0 ; python_version >= "3.11" and python_version < "3.12"
 ptyprocess==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12"
 pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12"
-pyinstaller-hooks-contrib==2023.7 ; python_version >= "3.11" and python_version < "3.12"
-pyinstaller==5.13.2 ; python_version >= "3.11" and python_version < "3.12"
+pyinstaller-hooks-contrib==2023.10 ; python_version >= "3.11" and python_version < "3.12"
+pyinstaller==6.2.0 ; python_version >= "3.11" and python_version < "3.12"
 pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12"
 pyproject-hooks==1.0.0 ; python_version >= "3.11" and python_version < "3.12"
 pyrsistent==0.19.3 ; python_version >= "3.11" and python_version < "3.12"
@@ -62,7 +62,7 @@ requests-toolbelt==1.0.0 ; python_version >= "3.11" and python_version < "3.12"
 requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12"
 secretstorage==3.3.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "linux"
 semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
-setuptools==68.1.2 ; python_version >= "3.11" and python_version < "3.12"
+setuptools==69.0.2 ; python_version >= "3.11" and python_version < "3.12"
 shellingham==1.5.3 ; python_version >= "3.11" and python_version < "3.12"
 six==1.16.0 ; python_version >= "3.11" and python_version < "3.12"
 sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/run.py b/run.py
index aa3c3afec..bfb8cba14 100644
--- a/run.py
+++ b/run.py
@@ -82,6 +82,7 @@
     engine_root,
     get_latest_core_version,
     get_save_dir,
+    internal_root,
 )
 
 
@@ -209,7 +210,7 @@ async def block_origin_middleware(request: Request, call_next):
 
     metas_store = MetasStore(root_dir / "speaker_info")
 
-    setting_ui_template = Jinja2Templates(directory=engine_root() / "ui_template")
+    setting_ui_template = Jinja2Templates(directory=internal_root() / "ui_template")
 
     # キャッシュを有効化
     # モジュール側でlru_cacheを指定するとキャッシュを制御しにくいため、HTTPサーバ側で指定する
diff --git a/run.spec b/run.spec
index 970f2adfa..65642c61d 100644
--- a/run.spec
+++ b/run.spec
@@ -1,49 +1,42 @@
 # -*- mode: python ; coding: utf-8 -*-
 # このファイルはPyInstallerによって自動生成されたもので、それをカスタマイズして使用しています。
+from argparse import ArgumentParser
+from pathlib import Path
+from shutil import copy2, copytree
+
 from PyInstaller.utils.hooks import collect_data_files
-import os
+
+parser = ArgumentParser()
+parser.add_argument("--libcore_path", type=Path)
+parser.add_argument("--libonnxruntime_path", type=Path)
+parser.add_argument("--core_model_dir_path", type=Path)
+options = parser.parse_args()
+
+libonnxruntime_path: Path | None = options.libonnxruntime_path
+if libonnxruntime_path is not None and not libonnxruntime_path.is_file():
+    raise Exception(f"libonnxruntime_path: {libonnxruntime_path} is not file")
+
+libcore_path: Path | None = options.libcore_path
+if libcore_path is not None and not libcore_path.is_file():
+    raise Exception(f"libcore_path: {libcore_path} is not file")
+
+core_model_dir_path: Path | None = options.core_model_dir_path
+if core_model_dir_path is not None and not core_model_dir_path.is_dir():
+    raise Exception(f"core_model_dir_path: {core_model_dir_path} is not dir")
 
 datas = [
-    ('engine_manifest_assets', 'engine_manifest_assets'),
-    ('speaker_info', 'speaker_info'),
-    ('engine_manifest.json', '.'),
-    ('default.csv', '.'),
-    ('licenses.json', '.'),
-    ('presets.yaml', '.'),
-    ('ui_template', 'ui_template'),
+    ("default.csv", "."),
+    ("presets.yaml", "."),
+    ("ui_template", "ui_template"),
 ]
-datas += collect_data_files('pyopenjtalk')
-
-core_model_dir_path = os.environ.get('CORE_MODEL_DIR_PATH')
-if core_model_dir_path:
-    print('CORE_MODEL_DIR_PATH is found:', core_model_dir_path)
-    if not os.path.isdir(core_model_dir_path):
-        raise Exception("CORE_MODEL_DIR_PATH was found, but it is not directory!")
-    datas += [(core_model_dir_path, "model")]
-
-# コアとONNX Runtimeはバイナリであるが、`binaries`に加えると
-# 依存関係のパスがPyInstallerに書き換えらるので、`datas`に加える
-# 参考: https://github.com/VOICEVOX/voicevox_engine/pull/446#issuecomment-1210052318
-libcore_path = os.environ.get('LIBCORE_PATH')
-if libcore_path:
-    print('LIBCORE_PATH is found:', libcore_path)
-    if not os.path.isfile(libcore_path):
-        raise Exception("LIBCORE_PATH was found, but it is not file!")
-    datas += [(libcore_path, ".")]
-
-libonnxruntime_path = os.environ.get('LIBONNXRUNTIME_PATH')
-if libonnxruntime_path:
-    print('LIBONNXRUNTIME_PATH is found:', libonnxruntime_path)
-    if not os.path.isfile(libonnxruntime_path):
-        raise Exception("LIBCORE_PATH was found, but it is not file!")
-    datas += [(libonnxruntime_path, ".")]
+datas += collect_data_files("pyopenjtalk")
 
 
 block_cipher = None
 
 
 a = Analysis(
-    ['run.py'],
+    ["run.py"],
     pathex=[],
     binaries=[],
     datas=datas,
@@ -65,7 +58,7 @@ exe = EXE(
     a.scripts,
     [],
     exclude_binaries=True,
-    name='run',
+    name="run",
     debug=False,
     bootloader_ignore_signals=False,
     strip=False,
@@ -76,6 +69,7 @@ exe = EXE(
     target_arch=None,
     codesign_identity=None,
     entitlements_file=None,
+    contents_directory="engine_internal",
 )
 
 coll = COLLECT(
@@ -86,5 +80,27 @@ coll = COLLECT(
     strip=False,
     upx=True,
     upx_exclude=[],
-    name='run',
+    name="run",
 )
+
+# 実行ファイル作成後の処理
+
+# 実行ファイルと同じrootディレクトリ
+target_dir = Path(DISTPATH) / "run"
+
+# 動的ライブラリをコピー
+if libonnxruntime_path is not None:
+    copy2(libonnxruntime_path, target_dir)
+if libcore_path is not None:
+    copy2(libcore_path, target_dir)
+if core_model_dir_path is not None:
+    copytree(core_model_dir_path, target_dir / "model")
+
+# 互換性維持のために必要なファイルをコピー
+license_file_path = Path("licenses.json")
+if license_file_path.is_file():
+    copy2("licenses.json", target_dir)
+
+copytree("speaker_info", target_dir / "speaker_info")
+copy2("engine_manifest.json", target_dir)
+copytree("engine_manifest_assets", target_dir / "engine_manifest_assets")
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
index abdf39f48..a64cb2363 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict.py
@@ -13,9 +13,9 @@
 
 from .model import UserDictWord, WordTypes
 from .part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY, part_of_speech_data
-from .utility import engine_root, get_save_dir, mutex_wrapper
+from .utility import get_save_dir, internal_root, mutex_wrapper
 
-root_dir = engine_root()
+root_dir = internal_root()
 save_dir = get_save_dir()
 
 if not save_dir.is_dir():
diff --git a/voicevox_engine/utility/__init__.py b/voicevox_engine/utility/__init__.py
index d40fea3e6..7ed74f118 100644
--- a/voicevox_engine/utility/__init__.py
+++ b/voicevox_engine/utility/__init__.py
@@ -5,7 +5,7 @@
 )
 from .core_version_utility import get_latest_core_version, parse_core_version
 from .mutex_utility import mutex_wrapper
-from .path_utility import delete_file, engine_root, get_save_dir
+from .path_utility import delete_file, engine_root, get_save_dir, internal_root
 
 __all__ = [
     "ConnectBase64WavesException",
@@ -13,8 +13,9 @@
     "decode_base64_waves",
     "get_latest_core_version",
     "parse_core_version",
+    "mutex_wrapper",
     "delete_file",
     "engine_root",
     "get_save_dir",
-    "mutex_wrapper",
+    "internal_root",
 ]
diff --git a/voicevox_engine/utility/path_utility.py b/voicevox_engine/utility/path_utility.py
index 7c46ad40b..6c5c36ca2 100644
--- a/voicevox_engine/utility/path_utility.py
+++ b/voicevox_engine/utility/path_utility.py
@@ -2,35 +2,60 @@
 import sys
 import traceback
 from pathlib import Path
+from typing import Literal
 
 from platformdirs import user_data_dir
 
 
+def _runtime_type() -> Literal["nuitka", "pyinstaller", "python"]:
+    """
+    コンパイルに使用したライブラリ名を返す。
+    コンパイルしていない場合は"python"を返す。
+    """
+    # nuitkaビルドをした際はグローバルに__compiled__が含まれる
+    if "__compiled__" in globals():
+        return "nuitka"
+
+    # pyinstallerでビルドをした際はsys.frozenが設定される
+    elif getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
+        return "pyinstaller"
+
+    return "python"
+
+
 def engine_root() -> Path:
-    if is_development():
-        root_dir = Path(__file__).parents[2]
+    """
+    開発環境ではリポジトリのルートディレクトリを返す。
+    コンパイル後は実行ファイルがあるディレクトリを返す。
+    """
+    runtime = _runtime_type()
+    if runtime == "nuitka":
+        root_dir = Path(sys.argv[0]).parent
+
+    elif runtime == "pyinstaller":
+        root_dir = Path(sys.executable).parent
 
-    # Nuitka/Pyinstallerでビルドされている場合
     else:
-        root_dir = Path(sys.argv[0]).parent
+        root_dir = Path(__file__).parents[2]
 
     return root_dir.resolve(strict=True)
 
 
+def internal_root() -> Path:
+    """
+    コンパイル時に収集された実行ファイル内部用のルートディレクトリを返す。
+    開発環境ではリポジトリのルートディレクトリを返す。
+    """
+    root_dir = Path(__file__).parents[2]
+    return root_dir.resolve(strict=True)
+
+
 def is_development() -> bool:
     """
     開発版かどうか判定する関数
     Nuitka/Pyinstallerでコンパイルされていない場合は開発環境とする。
     """
-    # nuitkaビルドをした際はグローバルに__compiled__が含まれる
-    if "__compiled__" in globals():
-        return False
-
-    # pyinstallerでビルドをした際はsys.frozenが設定される
-    elif getattr(sys, "frozen", False):
-        return False
-
-    return True
+    return _runtime_type() == "python"
 
 
 def get_save_dir():

From d0b8fffa56de30fa93bb67afbc9a48b3e54b7965 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 18 Dec 2023 06:30:32 +0900
Subject: [PATCH 035/177] =?UTF-8?q?hotifx:=20NumPy=20=E3=81=AE=20deprecate?=
 =?UTF-8?q?d=20=E3=81=AA=20cast=20(#888)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_synthesis_engine.py      | 10 +++++-----
 test/test_synthesis_engine_base.py | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 84064bdea..8c01d18ef 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -65,7 +65,7 @@ def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.nda
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
-        result.append(float(phoneme_list[i] * 0.5 + style_id))
+        result.append((phoneme_list[i] * 0.5 + style_id).item())
     return numpy.array(result)
 
 
@@ -83,7 +83,7 @@ def yukarin_sa_mock(
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
         result.append(
-            float(
+            (
                 (
                     vowel_phoneme_list[0][i]
                     + consonant_phoneme_list[0][i]
@@ -94,7 +94,7 @@ def yukarin_sa_mock(
                 )
                 * 0.5
                 + style_id
-            )
+            ).item()
         )
     return numpy.array(result)[numpy.newaxis]
 
@@ -112,10 +112,10 @@ def decode_mock(
         # decode forwardはデータサイズがlengthの256倍になるのでとりあえず256回データをresultに入れる
         for _ in range(256):
             result.append(
-                float(
+                (
                     f0[i][0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size)
                     + style_id
-                )
+                ).item()
             )
     return numpy.array(result)
 
diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py
index 7fa8fd676..ecee4df66 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_synthesis_engine_base.py
@@ -12,7 +12,7 @@ def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.nda
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
-        result.append(round(float(phoneme_list[i] * 0.0625 + style_id), 2))
+        result.append(round((phoneme_list[i] * 0.0625 + style_id).item(), 2))
     return numpy.array(result)
 
 
@@ -31,7 +31,7 @@ def yukarin_sa_mock(
     for i in range(length):
         result.append(
             round(
-                float(
+                (
                     (
                         vowel_phoneme_list[0][i]
                         + consonant_phoneme_list[0][i]
@@ -42,7 +42,7 @@ def yukarin_sa_mock(
                     )
                     * 0.0625
                     + style_id
-                ),
+                ).item(),
                 2,
             )
         )
@@ -62,10 +62,10 @@ def decode_mock(
         # decode forwardはデータサイズがlengthの256倍になるのでとりあえず256回データをresultに入れる
         for _ in range(256):
             result.append(
-                float(
+                (
                     f0[i][0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size)
                     + style_id
-                )
+                ).item()
             )
     return numpy.array(result)
 

From b6a04775b91a5e1e3984e41c51ccaf9676c3882c Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 18 Dec 2023 06:32:55 +0900
Subject: [PATCH 036/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`=5Fsynthesis=5Fi?=
 =?UTF-8?q?mpl`=20=E5=89=8D=E5=87=A6=E7=90=86/=E5=BE=8C=E5=87=A6=E7=90=86?=
 =?UTF-8?q?=E3=81=AE=E9=96=A2=E6=95=B0=E5=8C=96=20(#873)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_synthesis_engine.py              | 23 +++------
 voicevox_engine/tts_pipeline/tts_engine.py | 58 ++++++++++++++++------
 2 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 8c01d18ef..416a18feb 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -26,6 +26,7 @@
     calc_frame_pitch,
     mora_phoneme_list,
     pre_process,
+    query_to_decoder_feature,
     split_mora,
     to_flatten_moras,
     to_flatten_phonemes,
@@ -446,8 +447,8 @@ def test_calc_frame_phoneme():
     assert numpy.array_equal(frame_phoneme, true_frame_phoneme)
 
 
-def test_feat_to_framescale():
-    """Test Mora/Phonemefeature-to-framescaleFeature pipeline."""
+def test_query_to_decoder_feature():
+    """Test `query_to_decoder_feature`."""
     # Inputs
     accent_phrases = [
         AccentPhrase(
@@ -484,9 +485,9 @@ def test_feat_to_framescale():
     # phoneme
     #                     Pr  k   o   o  N  N pau  h   i   i   h   h  O Pt Pt Pt
     frame_phoneme_idxs = [0, 23, 30, 30, 4, 4, 0, 19, 21, 21, 19, 19, 5, 0, 0, 0]
-    true_frame_phoneme = numpy.zeros([n_frame, TRUE_NUM_PHONEME], dtype=numpy.float32)
+    true_phoneme = numpy.zeros([n_frame, TRUE_NUM_PHONEME], dtype=numpy.float32)
     for frame_idx, phoneme_idx in enumerate(frame_phoneme_idxs):
-        true_frame_phoneme[frame_idx, phoneme_idx] = 1.0
+        true_phoneme[frame_idx, phoneme_idx] = 1.0
     # Pitch
     #                   paw ko  N pau hi hO paw
     # frame_per_vowel = [1, 3,  2, 1, 3, 3, 3]
@@ -499,19 +500,9 @@ def test_feat_to_framescale():
     true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
 
     # Outputs
-    flatten_moras = to_flatten_moras(query.accent_phrases)
-    flatten_moras = apply_prepost_silence(flatten_moras, query)
-    flatten_moras = apply_speed_scale(flatten_moras, query)
-    flatten_moras = apply_pitch_scale(flatten_moras, query)
-    flatten_moras = apply_intonation_scale(flatten_moras, query)
-
-    phoneme_data_list = to_flatten_phonemes(flatten_moras)
+    phoneme, f0 = query_to_decoder_feature(query)
 
-    frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
-    f0 = calc_frame_pitch(flatten_moras)
-    frame_phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
-
-    assert numpy.array_equal(frame_phoneme, true_frame_phoneme)
+    assert numpy.array_equal(phoneme, true_phoneme)
     assert numpy.array_equal(f0, true_f0)
 
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index c05c122dd..bbd1d537e 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -359,6 +359,44 @@ def apply_output_stereo(wave: ndarray, query: AudioQuery) -> ndarray:
     return wave
 
 
+def query_to_decoder_feature(query: AudioQuery) -> tuple[ndarray, ndarray]:
+    """
+    音声合成用のクエリをデコーダー用特徴量へ変換する。
+    Parameters
+    ----------
+    query : AudioQuery
+        音声合成クエリ
+    Returns
+    -------
+    phoneme : ndarray
+        フレームごとの音素、shape=(Frame,)
+    f0 : ndarray
+        フレームごとの基本周波数、shape=(Frame,)
+    """
+    flatten_moras = to_flatten_moras(query.accent_phrases)
+
+    flatten_moras = apply_prepost_silence(flatten_moras, query)
+    flatten_moras = apply_speed_scale(flatten_moras, query)
+    flatten_moras = apply_pitch_scale(flatten_moras, query)
+    flatten_moras = apply_intonation_scale(flatten_moras, query)
+
+    phoneme_data_list = to_flatten_phonemes(flatten_moras)
+
+    frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
+    f0 = calc_frame_pitch(flatten_moras)
+    phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
+
+    return phoneme, f0
+
+
+def raw_wave_to_output_wave(query: AudioQuery, wave: ndarray, sr_wave: int) -> ndarray:
+    """生音声波形に音声合成用のクエリを適用して出力音声波形を生成する"""
+    wave = apply_volume_scale(wave, query)
+    wave = apply_output_sampling_rate(wave, sr_wave, query)
+    wave = apply_output_stereo(wave, query)
+    return wave
+
+
 class SynthesisEngine(SynthesisEngineBase):
     """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
 
@@ -614,31 +652,19 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
         # モデルがロードされていない場合はロードする
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
 
-        flatten_moras = to_flatten_moras(query.accent_phrases)
-        flatten_moras = apply_prepost_silence(flatten_moras, query)
-        flatten_moras = apply_speed_scale(flatten_moras, query)
-        flatten_moras = apply_pitch_scale(flatten_moras, query)
-        flatten_moras = apply_intonation_scale(flatten_moras, query)
-
-        phoneme_data_list = to_flatten_phonemes(flatten_moras)
-
-        frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
-        f0 = calc_frame_pitch(flatten_moras)
-        phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
+        phoneme, f0 = query_to_decoder_feature(query)
 
         # 今まで生成された情報をdecode_forwardにかけ、推論器によって音声波形を生成する
         with self.mutex:
-            wave = self.core.decode_forward(
+            raw_wave = self.core.decode_forward(
                 length=phoneme.shape[0],
                 phoneme_size=phoneme.shape[1],
                 f0=f0[:, numpy.newaxis],
                 phoneme=phoneme,
                 style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
             )
-            sr_wave = self.default_sampling_rate
+            sr_raw_wave = self.default_sampling_rate
 
-        wave = apply_volume_scale(wave, query)
-        wave = apply_output_sampling_rate(wave, sr_wave, query)
-        wave = apply_output_stereo(wave, query)
+        wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
 
         return wave

From 92af86fb107853e6367bf682bfdc05611eb1df31 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 18 Dec 2023 06:39:07 +0900
Subject: [PATCH 037/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20Ojt=E3=83=89?=
 =?UTF-8?q?=E3=83=A1=E3=82=A4=E3=83=B3=E5=A4=89=E6=8F=9B=E5=88=87=E3=82=8A?=
 =?UTF-8?q?=E5=87=BA=E3=81=97=E3=81=A8=E9=9B=86=E7=B4=84=20(#889)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tts_pipeline/tts_engine_base.py           | 79 +++++++++++--------
 1 file changed, 45 insertions(+), 34 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index f4eeda039..3a846905e 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -6,7 +6,7 @@
 
 from ..model import AccentPhrase, AudioQuery, Mora
 from . import full_context_label
-from .full_context_label import extract_full_context_label
+from .full_context_label import Utterance, extract_full_context_label
 from .mora_list import openjtalk_mora2text
 
 
@@ -131,6 +131,47 @@ def full_context_label_moras_to_moras(
     ]
 
 
+def utterance_to_accent_phrases(utterance: Utterance) -> list[AccentPhrase]:
+    """Utteranceインスタンスをアクセント句系列へドメイン変換する"""
+    return [
+        AccentPhrase(
+            moras=full_context_label_moras_to_moras(accent_phrase.moras),
+            accent=accent_phrase.accent,
+            pause_mora=(
+                Mora(
+                    text="、",
+                    consonant=None,
+                    consonant_length=None,
+                    vowel="pau",
+                    vowel_length=0,
+                    pitch=0,
+                )
+                if (
+                    i_accent_phrase == len(breath_group.accent_phrases) - 1
+                    and i_breath_group != len(utterance.breath_groups) - 1
+                )
+                else None
+            ),
+            is_interrogative=accent_phrase.is_interrogative,
+        )
+        for i_breath_group, breath_group in enumerate(utterance.breath_groups)
+        for i_accent_phrase, accent_phrase in enumerate(breath_group.accent_phrases)
+    ]
+
+
+def test_to_accent_phrases(text: str) -> list[AccentPhrase]:
+    """日本語テキストからアクセント句系列を生成"""
+    if len(text.strip()) == 0:
+        return []
+
+    # 音素とアクセントの推定
+    utterance = extract_full_context_label(text)
+    if len(utterance.breath_groups) == 0:
+        return []
+
+    return utterance_to_accent_phrases(utterance)
+
+
 class SynthesisEngineBase(metaclass=ABCMeta):
     @property
     @abstractmethod
@@ -260,42 +301,12 @@ def create_accent_phrases(self, text: str, style_id: int) -> List[AccentPhrase]:
         accent_phrases : List[AccentPhrase]
             アクセント句系列
         """
-        if len(text.strip()) == 0:
-            return []
-
         # 音素とアクセントの推定
-        utterance = extract_full_context_label(text)
-        if len(utterance.breath_groups) == 0:
-            return []
+        accent_phrases = test_to_accent_phrases(text)
 
-        # Utterance -> List[AccentPharase] のキャスト & 音素長・モーラ音高の推定と更新
+        # 音素長・モーラ音高の推定と更新
         accent_phrases = self.replace_mora_data(
-            accent_phrases=[
-                AccentPhrase(
-                    moras=full_context_label_moras_to_moras(accent_phrase.moras),
-                    accent=accent_phrase.accent,
-                    pause_mora=(
-                        Mora(
-                            text="、",
-                            consonant=None,
-                            consonant_length=None,
-                            vowel="pau",
-                            vowel_length=0,
-                            pitch=0,
-                        )
-                        if (
-                            i_accent_phrase == len(breath_group.accent_phrases) - 1
-                            and i_breath_group != len(utterance.breath_groups) - 1
-                        )
-                        else None
-                    ),
-                    is_interrogative=accent_phrase.is_interrogative,
-                )
-                for i_breath_group, breath_group in enumerate(utterance.breath_groups)
-                for i_accent_phrase, accent_phrase in enumerate(
-                    breath_group.accent_phrases
-                )
-            ],
+            accent_phrases=accent_phrases,
             style_id=style_id,
         )
         return accent_phrases

From 43b4e72ae65ede20b555a6816b5aacce19f17b94 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 18 Dec 2023 06:40:14 +0900
Subject: [PATCH 038/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20OpenJTalk=20?=
 =?UTF-8?q?=E7=B3=BB=E3=81=AE=E4=B8=8D=E4=BD=BF=E7=94=A8=E3=83=A1=E3=82=BD?=
 =?UTF-8?q?=E3=83=83=E3=83=89=E5=89=8A=E9=99=A4=20(#890)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_full_context_label.py               | 14 -------------
 .../tts_pipeline/full_context_label.py        | 20 -------------------
 2 files changed, 34 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 0c9ce3ee0..153c8b35f 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -266,20 +266,6 @@ def test_labels(self):
             self.accent_phrase_hiho.labels, self.test_case_hello_hiho[11:19]
         )
 
-    def test_merge(self):
-        # 「こんにちはヒホです」
-        # 読点を無くしたものと同等
-        merged_accent_phrase = self.accent_phrase_hello.merge(self.accent_phrase_hiho)
-        self.assertEqual(merged_accent_phrase.accent, 5)
-        self.assertEqual(
-            " ".join([phoneme.phoneme for phoneme in merged_accent_phrase.phonemes]),
-            "k o N n i ch i w a h i h o d e s U",
-        )
-        self.assertEqual(
-            merged_accent_phrase.labels,
-            self.test_case_hello_hiho[1:10] + self.test_case_hello_hiho[11:19],
-        )
-
 
 class TestBreathGroup(TestBasePhonemes):
     def setUp(self) -> None:
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index 1e61a17b0..50517f819 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -267,26 +267,6 @@ def labels(self):
         """
         return [p.label for p in self.phonemes]
 
-    def merge(self, accent_phrase: "AccentPhrase"):
-        """
-        AccentPhraseを合成する
-        (このクラスが保持するmorasの後ろに、引数として渡されたAccentPhraseのmorasを合成する)
-        Parameters
-        ----------
-        accent_phrase : AccentPhrase
-            合成したいAccentPhraseを渡す
-
-        Returns
-        -------
-        accent_phrase : AccentPhrase
-            合成されたAccentPhraseを返す
-        """
-        return AccentPhrase(
-            moras=self.moras + accent_phrase.moras,
-            accent=self.accent,
-            is_interrogative=accent_phrase.is_interrogative,
-        )
-
 
 @dataclass
 class BreathGroup:

From 35b7158f8ad0bd521dda999f5c332569d92bb6bd Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 18 Dec 2023 07:16:15 +0900
Subject: [PATCH 039/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=E7=94=A8=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89?=
 =?UTF-8?q?=E5=88=87=E3=82=8A=E5=87=BA=E3=81=97=20(#891)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_full_context_label.py               | 42 ++++++++++--
 .../tts_pipeline/full_context_label.py        | 68 -------------------
 2 files changed, 35 insertions(+), 75 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 153c8b35f..87bf57785 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -11,6 +11,29 @@
 )
 
 
+def contexts_to_feature(contexts: dict[str, str]) -> str:
+    """ラベルの contexts を feature へ変換する"""
+    return (
+        "{p1}^{p2}-{p3}+{p4}={p5}"
+        "/A:{a1}+{a2}+{a3}"
+        "/B:{b1}-{b2}_{b3}"
+        "/C:{c1}_{c2}+{c3}"
+        "/D:{d1}+{d2}_{d3}"
+        "/E:{e1}_{e2}!{e3}_{e4}-{e5}"
+        "/F:{f1}_{f2}#{f3}_{f4}@{f5}_{f6}|{f7}_{f8}"
+        "/G:{g1}_{g2}%{g3}_{g4}_{g5}"
+        "/H:{h1}_{h2}"
+        "/I:{i1}-{i2}@{i3}+{i4}&{i5}-{i6}|{i7}+{i8}"
+        "/J:{j1}_{j2}"
+        "/K:{k1}+{k2}-{k3}"
+    ).format(**contexts)
+
+
+def features(ojt_container: Mora | AccentPhrase | BreathGroup | Utterance):
+    """コンテナインスタンスに直接的・間接的に含まれる全ての feature を返す"""
+    return [contexts_to_feature(p.contexts) for p in ojt_container.phonemes]
+
+
 class TestBasePhonemes(TestCase):
     def setUp(self):
         super().setUp()
@@ -140,7 +163,10 @@ def test_is_pause(self):
 
     def test_label(self) -> None:
         self.assertEqual(
-            [phoneme.label for phoneme in self.phonemes_hello_hiho],
+            [
+                contexts_to_feature(phoneme.contexts)
+                for phoneme in self.phonemes_hello_hiho
+            ],
             self.test_case_hello_hiho,
         )
 
@@ -189,7 +215,9 @@ def assert_phonemes(self, mora: Mora, mora_str: str) -> None:
         )
 
     def assert_labels(self, mora: Mora, label_start: int, label_end: int) -> None:
-        self.assertEqual(mora.labels, self.test_case_hello_hiho[label_start:label_end])
+        self.assertEqual(
+            features(mora), self.test_case_hello_hiho[label_start:label_end]
+        )
 
     def test_phonemes(self) -> None:
         self.assert_phonemes(self.mora_hello_1, "ko")
@@ -260,10 +288,10 @@ def test_phonemes(self):
 
     def test_labels(self):
         self.assertEqual(
-            self.accent_phrase_hello.labels, self.test_case_hello_hiho[1:10]
+            features(self.accent_phrase_hello), self.test_case_hello_hiho[1:10]
         )
         self.assertEqual(
-            self.accent_phrase_hiho.labels, self.test_case_hello_hiho[11:19]
+            features(self.accent_phrase_hiho), self.test_case_hello_hiho[11:19]
         )
 
 
@@ -299,10 +327,10 @@ def test_phonemes(self):
 
     def test_labels(self):
         self.assertEqual(
-            self.breath_group_hello.labels, self.test_case_hello_hiho[1:10]
+            features(self.breath_group_hello), self.test_case_hello_hiho[1:10]
         )
         self.assertEqual(
-            self.breath_group_hiho.labels, self.test_case_hello_hiho[11:19]
+            features(self.breath_group_hiho), self.test_case_hello_hiho[11:19]
         )
 
 
@@ -387,4 +415,4 @@ def test_phonemes(self):
                 )
 
     def test_labels(self):
-        self.assertEqual(self.utterance_hello_hiho.labels, self.test_case_hello_hiho)
+        self.assertEqual(features(self.utterance_hello_hiho), self.test_case_hello_hiho)
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index 50517f819..6c1204d7b 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -54,30 +54,6 @@ def from_label(cls, label: str):
         ).groupdict()
         return cls(contexts=contexts)
 
-    @property
-    def label(self):
-        """
-        pyopenjtalk.extract_fullcontextで得られるラベルと等しい
-        Returns
-        -------
-        lebel: str
-            ラベルを返す
-        """
-        return (
-            "{p1}^{p2}-{p3}+{p4}={p5}"
-            "/A:{a1}+{a2}+{a3}"
-            "/B:{b1}-{b2}_{b3}"
-            "/C:{c1}_{c2}+{c3}"
-            "/D:{d1}+{d2}_{d3}"
-            "/E:{e1}_{e2}!{e3}_{e4}-{e5}"
-            "/F:{f1}_{f2}#{f3}_{f4}@{f5}_{f6}|{f7}_{f8}"
-            "/G:{g1}_{g2}%{g3}_{g4}_{g5}"
-            "/H:{h1}_{h2}"
-            "/I:{i1}-{i2}@{i3}+{i4}&{i5}-{i6}|{i7}+{i8}"
-            "/J:{j1}_{j2}"
-            "/K:{k1}+{k2}-{k3}"
-        ).format(**self.contexts)
-
     @property
     def phoneme(self):
         """
@@ -149,17 +125,6 @@ def phonemes(self):
         else:
             return [self.vowel]
 
-    @property
-    def labels(self):
-        """
-        ラベル群を返す
-        Returns
-        -------
-        labels : list[str]
-            Moraに含まれるすべてのラベルを返す
-        """
-        return [p.label for p in self.phonemes]
-
 
 @dataclass
 class AccentPhrase:
@@ -256,17 +221,6 @@ def phonemes(self):
         """
         return list(chain.from_iterable(m.phonemes for m in self.moras))
 
-    @property
-    def labels(self):
-        """
-        ラベル群を返す
-        Returns
-        -------
-        labels : list[str]
-            AccentPhraseに間接的に含まれる全てのラベルを返す
-        """
-        return [p.label for p in self.phonemes]
-
 
 @dataclass
 class BreathGroup:
@@ -341,17 +295,6 @@ def phonemes(self):
             )
         )
 
-    @property
-    def labels(self):
-        """
-        ラベル群を返す
-        Returns
-        -------
-        labels : list[str]
-            BreathGroupに間接的に含まれる全てのラベルを返す
-        """
-        return [p.label for p in self.phonemes]
-
 
 @dataclass
 class Utterance:
@@ -494,17 +437,6 @@ def phonemes(self):
 
         return phonemes
 
-    @property
-    def labels(self):
-        """
-        ラベル群を返す
-        Returns
-        -------
-        labels : list[str]
-            Utteranceクラスに直接的・間接的に含まれる全てのラベルを返す
-        """
-        return [p.label for p in self.phonemes]
-
 
 def extract_full_context_label(text: str):
     """

From 216b40652e23b6510b0bd0285b1551f6db9cd1b3 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 20 Dec 2023 01:37:33 +0900
Subject: [PATCH 040/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=95?=
 =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=83=A0=E6=95=B0=E3=82=AB=E3=82=A6=E3=83=B3?=
 =?UTF-8?q?=E3=83=88=E5=85=B1=E9=80=9A=E5=8C=96=20(#898)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_synthesis_engine.py              | 40 +++++----------
 voicevox_engine/tts_pipeline/tts_engine.py | 57 ++++++++++------------
 2 files changed, 36 insertions(+), 61 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 416a18feb..1f0d5016b 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -20,10 +20,9 @@
     apply_prepost_silence,
     apply_speed_scale,
     apply_volume_scale,
-    calc_frame_per_mora,
-    calc_frame_per_phoneme,
     calc_frame_phoneme,
     calc_frame_pitch,
+    count_frame_per_unit,
     mora_phoneme_list,
     pre_process,
     query_to_decoder_feature,
@@ -349,8 +348,8 @@ def test_apply_output_stereo():
     assert numpy.array_equal(wave, true_wave)
 
 
-def test_calc_frame_per_phoneme():
-    """Test `calc_frame_per_phoneme`."""
+def test_count_frame_per_unit():
+    """Test `count_frame_per_unit`."""
     # Inputs
     moras = [
         _gen_mora("　", None, None, "　", 2 * 0.01067, 0.0),  # 0.01067 [sec/frame]
@@ -366,35 +365,15 @@ def test_calc_frame_per_phoneme():
     #                        Pre k  o  N pau h  i  h  O Pst
     true_frame_per_phoneme = [2, 2, 4, 4, 2, 2, 4, 4, 2, 6]
     true_frame_per_phoneme = numpy.array(true_frame_per_phoneme, dtype=numpy.int32)
-
-    # Outputs
-    frame_per_phoneme = calc_frame_per_phoneme(moras)
-
-    assert numpy.array_equal(frame_per_phoneme, true_frame_per_phoneme)
-
-
-def test_calc_frame_per_mora():
-    """Test `calc_frame_per_mora`."""
-    # Inputs
-    moras = [
-        _gen_mora("　", None, None, "　", 2 * 0.01067, 0.0),  # 0.01067 [sec/frame]
-        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 0.0),
-        _gen_mora("ン", None, None, "N", 4 * 0.01067, 0.0),
-        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
-        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 0.0),
-        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
-        _gen_mora("　", None, None, "　", 6 * 0.01067, 0.0),
-    ]
-
-    # Expects
     #                    Pre ko  N pau hi hO Pst
     true_frame_per_mora = [2, 6, 4, 2, 6, 6, 6]
     true_frame_per_mora = numpy.array(true_frame_per_mora, dtype=numpy.int32)
 
     # Outputs
-    frame_per_phoneme = numpy.array(list(map(calc_frame_per_mora, moras)))
+    frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
 
-    assert numpy.array_equal(frame_per_phoneme, true_frame_per_mora)
+    assert numpy.array_equal(frame_per_phoneme, true_frame_per_phoneme)
+    assert numpy.array_equal(frame_per_mora, true_frame_per_mora)
 
 
 def test_calc_frame_pitch():
@@ -409,8 +388,11 @@ def test_calc_frame_pitch():
         _gen_mora("ホ", "h", 2 * 0.01067, "O", 1 * 0.01067, 0.0),
         _gen_mora("　", None, None, "　", 3 * 0.01067, 0.0),
     ]
+    #               Pre ko  N pau hi hO Pst
+    frame_per_mora = [1, 3, 2, 1, 3, 3, 3]
+    frame_per_mora = numpy.array(frame_per_mora, dtype=numpy.int32)
 
-    #           pau   ko     ko     ko      N      N
+    #           pau   ko   ko    ko     N     N
     true1_f0 = [0.0, 50.0, 50.0, 50.0, 50.0, 50.0]
     #           pau   hi     hi     hi
     true2_f0 = [0.0, 125.0, 125.0, 125.0]
@@ -419,7 +401,7 @@ def test_calc_frame_pitch():
     true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
 
     # Outputs
-    f0 = calc_frame_pitch(moras)
+    f0 = calc_frame_pitch(moras, frame_per_mora)
 
     assert numpy.array_equal(f0, true_f0)
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index bbd1d537e..67ebb9564 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -163,25 +163,36 @@ def apply_speed_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def calc_frame_per_phoneme(moras: List[Mora]):
+def count_frame_per_unit(moras: list[Mora]) -> tuple[ndarray, ndarray]:
     """
-    音素あたりのフレーム長を算出
+    音素あたり・モーラあたりのフレーム長を算出する
     Parameters
     ----------
-    moras : List[Mora]
-        モーラ列
+    moras : list[Mora]
+        モーラ系列
     Returns
     -------
-    frame_per_phoneme : NDArray[]
-        音素あたりのフレーム長。端数丸め。
+    frame_per_phoneme : ndarray
+        音素あたりのフレーム長。端数丸め。shape = (Phoneme,)
+    frame_per_mora : ndarray
+        モーラあたりのフレーム長。端数丸め。shape = (Mora,)
     """
     frame_per_phoneme: list[ndarray] = []
+    frame_per_mora: list[ndarray] = []
     for mora in moras:
+        vowel_frames = _to_frame(mora.vowel_length)
+        consonant_frames = _to_frame(mora.consonant_length) if mora.consonant else 0
+        mora_frames = vowel_frames + consonant_frames  # 音素ごとにフレーム長を算出し、和をモーラのフレーム長とする
+
         if mora.consonant:
-            frame_per_phoneme.append(_to_frame(mora.consonant_length))
-        frame_per_phoneme.append(_to_frame(mora.vowel_length))
+            frame_per_phoneme += [consonant_frames]
+        frame_per_phoneme += [vowel_frames]
+        frame_per_mora += [mora_frames]
+
     frame_per_phoneme = numpy.array(frame_per_phoneme)
-    return frame_per_phoneme
+    frame_per_mora = numpy.array(frame_per_mora)
+
+    return frame_per_phoneme, frame_per_mora
 
 
 def _to_frame(sec: float) -> ndarray:
@@ -190,24 +201,6 @@ def _to_frame(sec: float) -> ndarray:
     return numpy.round(sec * FRAMERATE).astype(numpy.int32)
 
 
-def calc_frame_per_mora(mora: Mora) -> ndarray:
-    """
-    モーラあたりのフレーム長を算出
-    Parameters
-    ----------
-    mora : Mora
-        モーラ
-    Returns
-    -------
-    frame_per_mora : NDArray[]
-        モーラあたりのフレーム長。端数丸め。
-    """
-    # 音素ごとにフレーム長を算出し、和をモーラのフレーム長とする
-    vowel_frames = _to_frame(mora.vowel_length)
-    consonant_frames = _to_frame(mora.consonant_length) if mora.consonant else 0
-    return vowel_frames + consonant_frames
-
-
 def apply_pitch_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     """
     音高スケール（`pitchScale`）の適用
@@ -250,13 +243,15 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def calc_frame_pitch(moras: list[Mora]) -> ndarray:
+def calc_frame_pitch(moras: list[Mora], frame_per_mora: ndarray) -> ndarray:
     """
     フレームごとのピッチの生成
     Parameters
     ----------
     moras : List[Mora]
         モーラ列
+    frame_per_mora : ndarray
+        モーラあたりのフレーム長
     Returns
     -------
     frame_f0 : NDArray[]
@@ -267,8 +262,6 @@ def calc_frame_pitch(moras: list[Mora]) -> ndarray:
     f0 = numpy.array([mora.pitch for mora in moras], dtype=numpy.float32)
 
     # Rescale: 時間スケールの変更（モーラ -> フレーム）
-    # 母音インデックスに基づき "音素あたりのフレーム長" を "モーラあたりのフレーム長" に集約
-    frame_per_mora = numpy.array(list(map(calc_frame_per_mora, moras)))
     frame_f0 = numpy.repeat(f0, frame_per_mora)
     return frame_f0
 
@@ -382,8 +375,8 @@ def query_to_decoder_feature(query: AudioQuery) -> tuple[ndarray, ndarray]:
 
     phoneme_data_list = to_flatten_phonemes(flatten_moras)
 
-    frame_per_phoneme = calc_frame_per_phoneme(flatten_moras)
-    f0 = calc_frame_pitch(flatten_moras)
+    frame_per_phoneme, frame_per_mora = count_frame_per_unit(flatten_moras)
+    f0 = calc_frame_pitch(flatten_moras, frame_per_mora)
     phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
 
     return phoneme, f0

From 92a05c1fa9b250758a6f48e4bebdf98b8cd1326f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 20 Dec 2023 02:42:59 +0900
Subject: [PATCH 041/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`SynthesisEngine`?=
 =?UTF-8?q?=20=E2=86=92=20`TTSEngine`=20=E6=94=B9=E5=90=8D=20(#870)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build_util/make_docs.py                          |  4 ++--
 run.py                                           |  6 +++---
 test/test_mock_synthesis_engine.py               |  6 +++---
 test/test_synthesis_engine.py                    |  8 ++++----
 test/test_synthesis_engine_base.py               |  6 +++---
 voicevox_engine/dev/core/mock.py                 |  2 +-
 voicevox_engine/dev/synthesis_engine/__init__.py |  4 ++--
 voicevox_engine/dev/synthesis_engine/mock.py     |  8 ++++----
 voicevox_engine/metas/MetasStore.py              |  8 ++++----
 voicevox_engine/morphing.py                      |  4 ++--
 voicevox_engine/tts_pipeline/__init__.py         |  8 ++++----
 voicevox_engine/tts_pipeline/make_tts_engines.py | 10 +++++-----
 voicevox_engine/tts_pipeline/tts_engine.py       |  4 ++--
 voicevox_engine/tts_pipeline/tts_engine_base.py  |  2 +-
 14 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index d21ba85b9..7bf1f0b6e 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -1,7 +1,7 @@
 import json
 
 from voicevox_engine.dev.core import mock as core
-from voicevox_engine.dev.synthesis_engine.mock import MockSynthesisEngine
+from voicevox_engine.dev.synthesis_engine.mock import MockTTSEngine
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
 from voicevox_engine.utility import engine_root
@@ -10,7 +10,7 @@
     import run
 
     app = run.generate_app(
-        synthesis_engines={"mock": MockSynthesisEngine(speakers=core.metas())},
+        synthesis_engines={"mock": MockTTSEngine(speakers=core.metas())},
         latest_core_version="mock",
         setting_loader=SettingLoader(USER_SETTING_PATH),
         preset_manager=PresetManager(  # FIXME: impl MockPresetManager
diff --git a/run.py b/run.py
index bfb8cba14..0e7a33cba 100644
--- a/run.py
+++ b/run.py
@@ -65,7 +65,7 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.tts_pipeline import SynthesisEngineBase, make_synthesis_engines
+from voicevox_engine.tts_pipeline import TTSEngineBase, make_synthesis_engines
 from voicevox_engine.tts_pipeline.kana_parser import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
@@ -131,7 +131,7 @@ def set_output_log_utf8() -> None:
 
 
 def generate_app(
-    synthesis_engines: Dict[str, SynthesisEngineBase],
+    synthesis_engines: Dict[str, TTSEngineBase],
     latest_core_version: str,
     setting_loader: SettingLoader,
     preset_manager: PresetManager,
@@ -227,7 +227,7 @@ async def block_origin_middleware(request: Request, call_next):
     def apply_user_dict():
         update_dict()
 
-    def get_engine(core_version: Optional[str]) -> SynthesisEngineBase:
+    def get_engine(core_version: Optional[str]) -> TTSEngineBase:
         if core_version is None:
             return synthesis_engines[latest_core_version]
         if core_version in synthesis_engines:
diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py
index 27fee31c1..e9cf71688 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_synthesis_engine.py
@@ -1,11 +1,11 @@
 from unittest import TestCase
 
-from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine
+from voicevox_engine.dev.synthesis_engine import MockTTSEngine
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.kana_parser import create_kana
 
 
-class TestMockSynthesisEngine(TestCase):
+class TestMockTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
 
@@ -102,7 +102,7 @@ def setUp(self):
                 pause_mora=None,
             ),
         ]
-        self.engine = MockSynthesisEngine(speakers="", supported_devices="")
+        self.engine = MockTTSEngine(speakers="", supported_devices="")
 
     def test_replace_phoneme_length(self):
         self.assertEqual(
diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 1f0d5016b..24136abe1 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -8,7 +8,7 @@
 import numpy
 
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.tts_pipeline import SynthesisEngine
+from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import OjtPhoneme
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
@@ -488,7 +488,7 @@ def test_query_to_decoder_feature():
     assert numpy.array_equal(f0, true_f0)
 
 
-class TestSynthesisEngine(TestCase):
+class TestTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
         self.str_list_hello_hiho = (
@@ -525,7 +525,7 @@ def setUp(self):
         self.yukarin_s_mock = core.yukarin_s_forward
         self.yukarin_sa_mock = core.yukarin_sa_forward
         self.decode_mock = core.decode_forward
-        self.synthesis_engine = SynthesisEngine(core=core)
+        self.synthesis_engine = TTSEngine(core=core)
 
     def test_to_flatten_moras(self):
         flatten_moras = to_flatten_moras(self.accent_phrases_hello_hiho)
@@ -772,7 +772,7 @@ def synthesis_test_base(self, audio_query: AudioQuery):
         for i in range(len(phoneme_length_list)):
             phoneme_length_list[i] /= audio_query.speedScale
 
-        # Outputs: MockCore入りSynthesisEngine の `.synthesis` 出力および core.decode_forward 引数
+        # Outputs: MockCore入りTTSEngine の `.synthesis` 出力および core.decode_forward 引数
         result = self.synthesis_engine.synthesis(query=audio_query, style_id=1)
         decode_args = self.decode_mock.call_args[1]
         list_length = decode_args["length"]
diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py
index ecee4df66..bc6d88f2c 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_synthesis_engine_base.py
@@ -5,7 +5,7 @@
 import numpy
 
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.tts_pipeline import SynthesisEngine
+from voicevox_engine.tts_pipeline import TTSEngine
 
 
 def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray):
@@ -184,10 +184,10 @@ def is_model_loaded(self, style_id):
         return True
 
 
-class TestSynthesisEngineBase(TestCase):
+class TestTTSEngineBase(TestCase):
     def setUp(self):
         super().setUp()
-        self.synthesis_engine = SynthesisEngine(
+        self.synthesis_engine = TTSEngine(
             core=MockCore(),
         )
         self.synthesis_engine._synthesis_impl = Mock()
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 2bc2102f0..c0531fbc8 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -32,7 +32,7 @@ def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
 def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
     """
     合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます（DUMMY_TEXT）
-    参照→SynthesisEngine のdocstring [Mock]
+    参照→TTSEngine のdocstring [Mock]
 
     Parameters
     ----------
diff --git a/voicevox_engine/dev/synthesis_engine/__init__.py b/voicevox_engine/dev/synthesis_engine/__init__.py
index e7b2ac5b1..ae0b29ec2 100644
--- a/voicevox_engine/dev/synthesis_engine/__init__.py
+++ b/voicevox_engine/dev/synthesis_engine/__init__.py
@@ -1,3 +1,3 @@
-from .mock import MockSynthesisEngine
+from .mock import MockTTSEngine
 
-__all__ = ["MockSynthesisEngine"]
+__all__ = ["MockTTSEngine"]
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index 3cb72dc79..b861dc7c9 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -6,13 +6,13 @@
 from soxr import resample
 
 from ...model import AccentPhrase, AudioQuery
-from ...tts_pipeline import SynthesisEngineBase
+from ...tts_pipeline import TTSEngineBase
 from ...tts_pipeline.tts_engine import to_flatten_moras
 
 
-class MockSynthesisEngine(SynthesisEngineBase):
+class MockTTSEngine(TTSEngineBase):
     """
-    SynthesisEngine [Mock]
+    TTSEngine [Mock]
     """
 
     def __init__(
@@ -110,7 +110,7 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int) -> np.ndarray:
     def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray:
         """
         forward tts via pyopenjtalk.tts()
-        参照→SynthesisEngine のdocstring [Mock]
+        参照→TTSEngine のdocstring [Mock]
 
         Parameters
         ----------
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 78f838a2a..76eceff8a 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -5,7 +5,7 @@
 from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo
 
 if TYPE_CHECKING:
-    from voicevox_engine.tts_pipeline.tts_engine_base import SynthesisEngineBase
+    from voicevox_engine.tts_pipeline.tts_engine_base import TTSEngineBase
 
 
 class MetasStore:
@@ -29,13 +29,13 @@ def __init__(self, engine_speakers_path: Path) -> None:
         }
 
     # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで
-    # SynthesisEngineBaseによる循環importを修正する
-    def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]:
+    # TTSEngineBaseによる循環importを修正する
+    def load_combined_metas(self, engine: "TTSEngineBase") -> List[Speaker]:
         """
         コアに含まれる話者メタ情報とエンジンに含まれる話者メタ情報を統合
         Parameters
         ----------
-        engine : SynthesisEngineBase
+        engine : TTSEngineBase
             コアに含まれる話者メタ情報をもったエンジン
         Returns
         -------
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 89a2498c3..ee7bf446d 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -10,7 +10,7 @@
 from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo
 from .metas.MetasStore import construct_lookup
 from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
-from .tts_pipeline import SynthesisEngine
+from .tts_pipeline import TTSEngine
 
 
 # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398  # noqa
@@ -128,7 +128,7 @@ def is_synthesis_morphing_permitted(
 
 
 def synthesis_morphing_parameter(
-    engine: SynthesisEngine,
+    engine: TTSEngine,
     query: AudioQuery,
     base_speaker: int,
     target_speaker: int,
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
index 2fce842ba..8aeea1b06 100644
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -1,12 +1,12 @@
 from ..core_wrapper import CoreWrapper, load_runtime_lib
 from .make_tts_engines import make_synthesis_engines
-from .tts_engine import SynthesisEngine
-from .tts_engine_base import SynthesisEngineBase
+from .tts_engine import TTSEngine
+from .tts_engine_base import TTSEngineBase
 
 __all__ = [
     "CoreWrapper",
     "load_runtime_lib",
     "make_synthesis_engines",
-    "SynthesisEngine",
-    "SynthesisEngineBase",
+    "TTSEngine",
+    "TTSEngineBase",
 ]
diff --git a/voicevox_engine/tts_pipeline/make_tts_engines.py b/voicevox_engine/tts_pipeline/make_tts_engines.py
index 09183574a..8ee63c907 100644
--- a/voicevox_engine/tts_pipeline/make_tts_engines.py
+++ b/voicevox_engine/tts_pipeline/make_tts_engines.py
@@ -5,7 +5,7 @@
 
 from ..core_wrapper import CoreWrapper, load_runtime_lib
 from ..utility import engine_root, get_save_dir
-from .tts_engine import SynthesisEngine, SynthesisEngineBase
+from .tts_engine import TTSEngine, TTSEngineBase
 
 
 def make_synthesis_engines(
@@ -16,7 +16,7 @@ def make_synthesis_engines(
     cpu_num_threads: Optional[int] = None,
     enable_mock: bool = True,
     load_all_models: bool = False,
-) -> Dict[str, SynthesisEngineBase]:
+) -> Dict[str, TTSEngineBase]:
     """
     音声ライブラリをロードして、音声合成エンジンを生成
 
@@ -88,7 +88,7 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
                         file=sys.stderr,
                     )
                 else:
-                    synthesis_engines[core_version] = SynthesisEngine(core=core)
+                    synthesis_engines[core_version] = TTSEngine(core=core)
             except Exception:
                 if not suppress_error:
                     raise
@@ -113,11 +113,11 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
         # モック追加
         from ..dev.core import metas as mock_metas
         from ..dev.core import supported_devices as mock_supported_devices
-        from ..dev.synthesis_engine import MockSynthesisEngine
+        from ..dev.synthesis_engine import MockTTSEngine
 
         if "0.0.0" not in synthesis_engines:
             print("Info: Loading mock.")
-            synthesis_engines["0.0.0"] = MockSynthesisEngine(
+            synthesis_engines["0.0.0"] = MockTTSEngine(
                 speakers=mock_metas(), supported_devices=mock_supported_devices()
             )
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 67ebb9564..519c2836b 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -9,7 +9,7 @@
 from ..core_wrapper import CoreWrapper, OldCoreError
 from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import OjtPhoneme
-from .tts_engine_base import SynthesisEngineBase
+from .tts_engine_base import TTSEngineBase
 
 unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
 mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
@@ -390,7 +390,7 @@ def raw_wave_to_output_wave(query: AudioQuery, wave: ndarray, sr_wave: int) -> n
     return wave
 
 
-class SynthesisEngine(SynthesisEngineBase):
+class TTSEngine(TTSEngineBase):
     """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
 
     def __init__(self, core: CoreWrapper):
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index 3a846905e..7ffca18a5 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -172,7 +172,7 @@ def test_to_accent_phrases(text: str) -> list[AccentPhrase]:
     return utterance_to_accent_phrases(utterance)
 
 
-class SynthesisEngineBase(metaclass=ABCMeta):
+class TTSEngineBase(metaclass=ABCMeta):
     @property
     @abstractmethod
     def default_sampling_rate(self) -> int:

From 53f8c540a93a5eece40a779b01877f29beb77d6c Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 20 Dec 2023 03:15:57 +0900
Subject: [PATCH 042/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E6=B3=A2?=
 =?UTF-8?q?=E5=BD=A2=E5=90=88=E6=88=90=E5=BE=8C=E5=87=A6=E7=90=86=E3=81=AE?=
 =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=20(#902)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_synthesis_engine.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 24136abe1..f0f62427a 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -26,6 +26,7 @@
     mora_phoneme_list,
     pre_process,
     query_to_decoder_feature,
+    raw_wave_to_output_wave,
     split_mora,
     to_flatten_moras,
     to_flatten_phonemes,
@@ -488,6 +489,38 @@ def test_query_to_decoder_feature():
     assert numpy.array_equal(f0, true_f0)
 
 
+def test_raw_wave_to_output_wave_with_resample():
+    """Test `raw_wave_to_output_wave` with resampling option."""
+    # Inputs
+    query = _gen_query(volumeScale=2, outputSamplingRate=48000, outputStereo=True)
+    raw_wave = numpy.random.rand(240)
+    sr_raw_wave = 24000
+
+    # Expects
+    true_wave_shape = (480, 2)
+
+    # Outputs
+    wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
+
+    assert wave.shape == true_wave_shape
+
+
+def test_raw_wave_to_output_wave_without_resample():
+    """Test `raw_wave_to_output_wave`  without resampling option."""
+    # Inputs
+    query = _gen_query(volumeScale=2, outputStereo=True)
+    raw_wave = numpy.random.rand(240)
+    sr_raw_wave = 24000
+
+    # Expects
+    true_wave = numpy.array([2 * raw_wave, 2 * raw_wave]).T
+
+    # Outputs
+    wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
+
+    assert numpy.allclose(wave, true_wave)
+
+
 class TestTTSEngine(TestCase):
     def setUp(self):
         super().setUp()

From d8488ff16e0fdf164621665a26ef410c73c908ed Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 20 Dec 2023 04:08:13 +0900
Subject: [PATCH 043/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E7=B0=A1?=
 =?UTF-8?q?=E6=98=93=20docstring=20=E3=81=A8=E5=8D=98=E7=B4=94=E5=A4=89?=
 =?UTF-8?q?=E6=95=B0=E5=90=8D=20(#903)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/tts_pipeline/tts_engine.py | 119 +++------------------
 1 file changed, 16 insertions(+), 103 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 519c2836b..acf6e8c5a 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -123,19 +123,7 @@ def generate_silence_mora(length: float) -> Mora:
 
 
 def apply_prepost_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
-    """
-    前後無音（`prePhonemeLength` & `postPhonemeLength`）の適用
-    Parameters
-    ----------
-    moras : List[Mora]
-        モーラ時系列
-    query : AudioQuery
-        音声合成用のクエリ
-    Returns
-    -------
-    moras : List[Mora]
-        前後無音が付加されたモーラ時系列
-    """
+    """モーラ系列へ音声合成用のクエリがもつ前後無音（`prePhonemeLength` & `postPhonemeLength`）を付加する"""
     pre_silence_moras = [generate_silence_mora(query.prePhonemeLength)]
     post_silence_moras = [generate_silence_mora(query.postPhonemeLength)]
     moras = pre_silence_moras + moras + post_silence_moras
@@ -143,19 +131,7 @@ def apply_prepost_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
 
 
 def apply_speed_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
-    """
-    話速スケール（`speedScale`）の適用
-    Parameters
-    ----------
-    moras : list[Mora]
-        モーラ系列
-    query : AudioQuery
-        音声合成用のクエリ
-    Returns
-    -------
-    moras : list[Mora]
-        話速スケールが適用されたモーラ系列
-    """
+    """モーラ系列へ音声合成用のクエリがもつ話速スケール（`speedScale`）を適用する"""
     for mora in moras:
         mora.vowel_length /= query.speedScale
         if mora.consonant_length:
@@ -202,38 +178,14 @@ def _to_frame(sec: float) -> ndarray:
 
 
 def apply_pitch_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
-    """
-    音高スケール（`pitchScale`）の適用
-    Parameters
-    ----------
-    moras : list[Mora]
-        モーラ系列
-    query : AudioQuery
-        音声合成用のクエリ
-    Returns
-    -------
-    moras : list[Mora]
-        音高スケールが適用されたモーラ系列
-    """
+    """モーラ系列へ音声合成用のクエリがもつ音高スケール（`pitchScale`）を適用する"""
     for mora in moras:
         mora.pitch *= 2**query.pitchScale
     return moras
 
 
 def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
-    """
-    抑揚スケール（`intonationScale`）の適用
-    Parameters
-    ----------
-    moras : list[Mora]
-        モーラ系列
-    query : AudioQuery
-        音声合成用のクエリ
-    Returns
-    -------
-    moras : list[Mora]
-        抑揚スケールが適用されたモーラ系列
-    """
+    """モーラ系列へ音声合成用のクエリがもつ抑揚スケール（`intonationScale`）を適用する"""
     # 有声音素 (f0>0) の平均値に対する乖離度をスケール
     voiced = list(filter(lambda mora: mora.pitch > 0, moras))
     mean_f0 = numpy.mean(list(map(lambda mora: mora.pitch, voiced))).item()
@@ -267,19 +219,7 @@ def calc_frame_pitch(moras: list[Mora], frame_per_mora: ndarray) -> ndarray:
 
 
 def apply_volume_scale(wave: numpy.ndarray, query: AudioQuery) -> numpy.ndarray:
-    """
-    音量スケール（`volumeScale`）の適用
-    Parameters
-    ----------
-    wave : numpy.ndarray
-        音声波形
-    query : AudioQuery
-        音声合成用のクエリ
-    Returns
-    -------
-    wave : numpy.ndarray
-        音量スケールが適用された音声波形
-    """
+    """音声波形へ音声合成用のクエリがもつ音量スケール（`volumeScale`）を適用する"""
     wave *= query.volumeScale
     return wave
 
@@ -310,43 +250,16 @@ def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndar
 def apply_output_sampling_rate(
     wave: ndarray, sr_wave: int, query: AudioQuery
 ) -> ndarray:
-    """
-    出力サンプリングレート（`outputSamplingRate`）の適用
-    Parameters
-    ----------
-    wave : ndarray
-        音声波形
-    sr_wave : int
-        `wave`のサンプリングレート
-    query : AudioQuery
-        音声合成用のクエリ
-    Returns
-    -------
-    wave : ndarray
-        出力サンプリングレートが適用された音声波形
-    """
+    """音声波形へ音声合成用のクエリがもつ出力サンプリングレート（`outputSamplingRate`）を適用する"""
     # サンプリングレート一致のときはスルー
     if sr_wave == query.outputSamplingRate:
         return wave
-
     wave = resample(wave, sr_wave, query.outputSamplingRate)
     return wave
 
 
 def apply_output_stereo(wave: ndarray, query: AudioQuery) -> ndarray:
-    """
-    ステレオ出力（`outputStereo`）の適用
-    Parameters
-    ----------
-    wave : ndarray
-        音声波形
-    query : AudioQuery
-        音声合成用のクエリ
-    Returns
-    -------
-    wave : ndarray
-        ステレオ出力設定が適用された音声波形
-    """
+    """音声波形へ音声合成用のクエリがもつステレオ出力設定（`outputStereo`）を適用する"""
     if query.outputStereo:
         wave = numpy.array([wave, wave]).T
     return wave
@@ -366,18 +279,18 @@ def query_to_decoder_feature(query: AudioQuery) -> tuple[ndarray, ndarray]:
     f0 : ndarray
         フレームごとの基本周波数、shape=(Frame,)
     """
-    flatten_moras = to_flatten_moras(query.accent_phrases)
+    moras = to_flatten_moras(query.accent_phrases)
 
-    flatten_moras = apply_prepost_silence(flatten_moras, query)
-    flatten_moras = apply_speed_scale(flatten_moras, query)
-    flatten_moras = apply_pitch_scale(flatten_moras, query)
-    flatten_moras = apply_intonation_scale(flatten_moras, query)
+    moras = apply_prepost_silence(moras, query)
+    moras = apply_speed_scale(moras, query)
+    moras = apply_pitch_scale(moras, query)
+    moras = apply_intonation_scale(moras, query)
 
-    phoneme_data_list = to_flatten_phonemes(flatten_moras)
+    phonemes = to_flatten_phonemes(moras)
 
-    frame_per_phoneme, frame_per_mora = count_frame_per_unit(flatten_moras)
-    f0 = calc_frame_pitch(flatten_moras, frame_per_mora)
-    phoneme = calc_frame_phoneme(phoneme_data_list, frame_per_phoneme)
+    frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
+    f0 = calc_frame_pitch(moras, frame_per_mora)
+    phoneme = calc_frame_phoneme(phonemes, frame_per_phoneme)
 
     return phoneme, f0
 

From 1639300b896d94abf80a44e5039971763c9de788 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 20 Dec 2023 17:02:37 +0900
Subject: [PATCH 044/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E6=B3=A2?=
 =?UTF-8?q?=E5=BD=A2=E5=90=88=E6=88=90=E5=89=8D=E5=87=A6=E7=90=86=E3=81=AE?=
 =?UTF-8?q?=E9=96=A2=E6=95=B0=E3=83=8D=E3=82=B9=E3=83=88=E5=BB=83=E6=AD=A2?=
 =?UTF-8?q?=20(#907)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: 関数ネストの廃止

* Update voicevox_engine/tts_pipeline/tts_engine.py

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_synthesis_engine.py              | 55 -----------------
 voicevox_engine/tts_pipeline/tts_engine.py | 70 +++-------------------
 2 files changed, 8 insertions(+), 117 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index f0f62427a..ed6345674 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -20,8 +20,6 @@
     apply_prepost_silence,
     apply_speed_scale,
     apply_volume_scale,
-    calc_frame_phoneme,
-    calc_frame_pitch,
     count_frame_per_unit,
     mora_phoneme_list,
     pre_process,
@@ -377,59 +375,6 @@ def test_count_frame_per_unit():
     assert numpy.array_equal(frame_per_mora, true_frame_per_mora)
 
 
-def test_calc_frame_pitch():
-    """Test `test_calc_frame_pitch`."""
-    # Inputs
-    moras = [
-        _gen_mora("　", None, None, "　", 1 * 0.01067, 0.0),
-        _gen_mora("コ", "k", 1 * 0.01067, "o", 2 * 0.01067, 50.0),
-        _gen_mora("ン", None, None, "N", 2 * 0.01067, 50.0),
-        _gen_mora("、", None, None, "pau", 1 * 0.01067, 0.0),
-        _gen_mora("ヒ", "h", 1 * 0.01067, "i", 2 * 0.01067, 125.0),
-        _gen_mora("ホ", "h", 2 * 0.01067, "O", 1 * 0.01067, 0.0),
-        _gen_mora("　", None, None, "　", 3 * 0.01067, 0.0),
-    ]
-    #               Pre ko  N pau hi hO Pst
-    frame_per_mora = [1, 3, 2, 1, 3, 3, 3]
-    frame_per_mora = numpy.array(frame_per_mora, dtype=numpy.int32)
-
-    #           pau   ko   ko    ko     N     N
-    true1_f0 = [0.0, 50.0, 50.0, 50.0, 50.0, 50.0]
-    #           pau   hi     hi     hi
-    true2_f0 = [0.0, 125.0, 125.0, 125.0]
-    #           hO   hO   hO   paw  paw  paw
-    true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-    true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
-
-    # Outputs
-    f0 = calc_frame_pitch(moras, frame_per_mora)
-
-    assert numpy.array_equal(f0, true_f0)
-
-
-def test_calc_frame_phoneme():
-    """Test `calc_frame_phoneme`."""
-    # Inputs
-    phoneme_str = "pau k o N pau h i h O pau"
-    phonemes = [OjtPhoneme(p) for p in phoneme_str.split()]
-    #                   Pre k  o  N pau h  i  h  O Pst
-    frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
-    n_frame = sum(frame_per_phoneme)
-    frame_per_phoneme = numpy.array(frame_per_phoneme, dtype=numpy.int32)
-
-    # Expects
-    #              Pr  k   o   o  N  N pau  h   i   i   h   h  O Pt Pt Pt
-    phoneme_ids = [0, 23, 30, 30, 4, 4, 0, 19, 21, 21, 19, 19, 5, 0, 0, 0]
-    true_frame_phoneme = numpy.zeros([n_frame, TRUE_NUM_PHONEME], dtype=numpy.float32)
-    for frame_idx, phoneme_idx in enumerate(phoneme_ids):
-        true_frame_phoneme[frame_idx, phoneme_idx] = 1.0
-
-    # Outputs
-    frame_phoneme = calc_frame_phoneme(phonemes, frame_per_phoneme)
-
-    assert numpy.array_equal(frame_phoneme, true_frame_phoneme)
-
-
 def test_query_to_decoder_feature():
     """Test `query_to_decoder_feature`."""
     # Inputs
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index acf6e8c5a..803f721e5 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -195,58 +195,12 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def calc_frame_pitch(moras: list[Mora], frame_per_mora: ndarray) -> ndarray:
-    """
-    フレームごとのピッチの生成
-    Parameters
-    ----------
-    moras : List[Mora]
-        モーラ列
-    frame_per_mora : ndarray
-        モーラあたりのフレーム長
-    Returns
-    -------
-    frame_f0 : NDArray[]
-        フレームごとの基本周波数系列
-    """
-    # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
-    # モーラごとの基本周波数
-    f0 = numpy.array([mora.pitch for mora in moras], dtype=numpy.float32)
-
-    # Rescale: 時間スケールの変更（モーラ -> フレーム）
-    frame_f0 = numpy.repeat(f0, frame_per_mora)
-    return frame_f0
-
-
 def apply_volume_scale(wave: numpy.ndarray, query: AudioQuery) -> numpy.ndarray:
     """音声波形へ音声合成用のクエリがもつ音量スケール（`volumeScale`）を適用する"""
     wave *= query.volumeScale
     return wave
 
 
-def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndarray):
-    """
-    フレームごとの音素列の生成（onehot化 + フレーム化）
-    Parameters
-    ----------
-    phonemes : List[OjtPhoneme]
-        音素列
-    frame_per_phoneme: NDArray
-        音素あたりのフレーム長。端数丸め。
-    Returns
-    -------
-    frame_phoneme : NDArray[]
-        フレームごとの音素系列
-    """
-    # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
-    # Convert: Core入力形式への変換（onehotベクトル系列）
-    onehot_phoneme = numpy.stack([p.onehot for p in phonemes])
-
-    # Rescale: 時間スケールの変更（音素 -> フレーム）
-    frame_phoneme = numpy.repeat(onehot_phoneme, frame_per_phoneme, axis=0)
-    return frame_phoneme
-
-
 def apply_output_sampling_rate(
     wave: ndarray, sr_wave: int, query: AudioQuery
 ) -> ndarray:
@@ -266,31 +220,23 @@ def apply_output_stereo(wave: ndarray, query: AudioQuery) -> ndarray:
 
 
 def query_to_decoder_feature(query: AudioQuery) -> tuple[ndarray, ndarray]:
-    """
-    音声合成用のクエリをデコーダー用特徴量へ変換する。
-    Parameters
-    ----------
-    query : AudioQuery
-        音声合成クエリ
-    Returns
-    -------
-    phoneme : ndarray
-        フレームごとの音素、shape=(Frame,)
-    f0 : ndarray
-        フレームごとの基本周波数、shape=(Frame,)
-    """
+    """音声合成用のクエリからフレームごとの音素 (shape=(フレーム長, 音素数)) と音高 (shape=(フレーム長,)) を得る"""
     moras = to_flatten_moras(query.accent_phrases)
 
+    # 設定を適用する
     moras = apply_prepost_silence(moras, query)
     moras = apply_speed_scale(moras, query)
     moras = apply_pitch_scale(moras, query)
     moras = apply_intonation_scale(moras, query)
 
-    phonemes = to_flatten_phonemes(moras)
+    # 表現を変更する（音素クラス → 音素 onehot ベクトル、モーラクラス → 音高スカラ）
+    phoneme = numpy.stack([p.onehot for p in to_flatten_phonemes(moras)])
+    f0 = numpy.array([mora.pitch for mora in moras], dtype=numpy.float32)
 
+    # 時間スケールを変更する（音素・モーラ → フレーム）
     frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
-    f0 = calc_frame_pitch(moras, frame_per_mora)
-    phoneme = calc_frame_phoneme(phonemes, frame_per_phoneme)
+    phoneme = numpy.repeat(phoneme, frame_per_phoneme, axis=0)
+    f0 = numpy.repeat(f0, frame_per_mora)
 
     return phoneme, f0
 

From 831d28aad743a7583e8260cfc72da53f205fa966 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 20 Dec 2023 20:34:13 +0900
Subject: [PATCH 045/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20OpenJTalk=20`Phon?=
 =?UTF-8?q?eme`=20=E2=86=92=20`Label`=20=E3=83=AA=E3=83=8D=E3=83=BC?=
 =?UTF-8?q?=E3=83=A0=20(#893)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_full_context_label.py               |  16 +-
 .../tts_pipeline/full_context_label.py        | 206 ++++++++----------
 2 files changed, 96 insertions(+), 126 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 87bf57785..77acad8b9 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -5,8 +5,8 @@
 from voicevox_engine.tts_pipeline.full_context_label import (
     AccentPhrase,
     BreathGroup,
+    Label,
     Mora,
-    Phoneme,
     Utterance,
 )
 
@@ -123,7 +123,7 @@ def setUp(self):
             + "@xx+xx&xx-xx|xx+xx/J:xx_xx/K:2+2-9",
         ]
         self.phonemes_hello_hiho = [
-            Phoneme.from_label(label) for label in self.test_case_hello_hiho
+            Label.from_feature(feature) for feature in self.test_case_hello_hiho
         ]
 
 
@@ -254,10 +254,10 @@ def setUp(self) -> None:
         super().setUp()
         # TODO: ValueErrorを吐く作為的ではない自然な例の模索
         # 存在しないなら放置でよい
-        self.accent_phrase_hello = AccentPhrase.from_phonemes(
+        self.accent_phrase_hello = AccentPhrase.from_labels(
             self.phonemes_hello_hiho[1:10]
         )
-        self.accent_phrase_hiho = AccentPhrase.from_phonemes(
+        self.accent_phrase_hiho = AccentPhrase.from_labels(
             self.phonemes_hello_hiho[11:19]
         )
 
@@ -298,10 +298,10 @@ def test_labels(self):
 class TestBreathGroup(TestBasePhonemes):
     def setUp(self) -> None:
         super().setUp()
-        self.breath_group_hello = BreathGroup.from_phonemes(
+        self.breath_group_hello = BreathGroup.from_labels(
             self.phonemes_hello_hiho[1:10]
         )
-        self.breath_group_hiho = BreathGroup.from_phonemes(
+        self.breath_group_hiho = BreathGroup.from_labels(
             self.phonemes_hello_hiho[11:19]
         )
 
@@ -337,7 +337,7 @@ def test_labels(self):
 class TestUtterance(TestBasePhonemes):
     def setUp(self) -> None:
         super().setUp()
-        self.utterance_hello_hiho = Utterance.from_phonemes(self.phonemes_hello_hiho)
+        self.utterance_hello_hiho = Utterance.from_labels(self.phonemes_hello_hiho)
 
     def test_phonemes(self):
         self.assertEqual(
@@ -346,7 +346,7 @@ def test_phonemes(self):
             ),
             "sil k o N n i ch i w a pau h i h o d e s U sil",
         )
-        changed_utterance = Utterance.from_phonemes(self.utterance_hello_hiho.phonemes)
+        changed_utterance = Utterance.from_labels(self.utterance_hello_hiho.phonemes)
         self.assertEqual(len(changed_utterance.breath_groups), 2)
         accent_phrases = list(
             chain.from_iterable(
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index 6c1204d7b..ff843dcdc 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -7,36 +7,22 @@
 
 
 @dataclass
-class Phoneme:
+class Label:
     """
-    音素(母音・子音)クラス、音素の元となるcontextを保持する
-    音素には、母音や子音以外にも無音(silent/pause)も含まれる
+    OpenJTalk Label
 
     Attributes
     ----------
     contexts: dict[str, str]
-        音素の元
+        ラベルの属性
     """
 
     contexts: dict[str, str]
 
     @classmethod
-    def from_label(cls, label: str):
-        """
-        pyopenjtalk.extract_fullcontextで得られる音素の元(ラベル)から、Phonemeクラスを作成する
-        Parameters
-        ----------
-        label : str
-            pyopenjtalk.extract_fullcontextで得られるラベルを渡す
-
-        Returns
-        -------
-        phoneme: Phoneme
-            Phonemeクラスを返す
-        """
-
-        # フルコンテキストラベルの仕様は、
-        # http://hts.sp.nitech.ac.jp/?Download の HTS-2.3のJapanese tar.bz2 (126 MB)をダウンロードして、data/lab_format.pdfを見るとリストが見つかります。 # noqa
+    def from_feature(cls, feature: str):
+        """OpenJTalk feature から Label インスタンスを生成する"""
+        # フルコンテキストラベルの仕様は、http://hts.sp.nitech.ac.jp/?Download の HTS-2.3のJapanese tar.bz2 (126 MB)をダウンロードして、data/lab_format.pdfを見るとリストが見つかります。 # noqa
         contexts = re.search(
             r"^(?P<p1>.+?)\^(?P<p2>.+?)\-(?P<p3>.+?)\+(?P<p4>.+?)\=(?P<p5>.+?)"
             r"/A\:(?P<a1>.+?)\+(?P<a2>.+?)\+(?P<a3>.+?)"
@@ -50,33 +36,21 @@ def from_label(cls, label: str):
             r"/I\:(?P<i1>.+?)\-(?P<i2>.+?)\@(?P<i3>.+?)\+(?P<i4>.+?)\&(?P<i5>.+?)\-(?P<i6>.+?)\|(?P<i7>.+?)\+(?P<i8>.+?)"  # noqa
             r"/J\:(?P<j1>.+?)\_(?P<j2>.+?)"
             r"/K\:(?P<k1>.+?)\+(?P<k2>.+?)\-(?P<k3>.+?)$",
-            label,
+            feature,
         ).groupdict()
         return cls(contexts=contexts)
 
     @property
     def phoneme(self):
-        """
-        音素クラスの中で、発声に必要なcontextを返す
-        Returns
-        -------
-        phoneme : str
-            発声に必要なcontextを返す
-        """
+        """このラベルに含まれる音素。子音 or 母音 (無音含む)。"""
         return self.contexts["p3"]
 
     def is_pause(self):
-        """
-        音素がポーズ(無音、silent/pause)であるかを返す
-        Returns
-        -------
-        is_pose : bool
-            音素がポーズ(無音、silent/pause)であるか(True)否か(False)
-        """
+        """このラベルが無音 (silent/pause) であれば True、そうでなければ False を返す"""
         return self.contexts["f1"] == "xx"
 
     def __repr__(self):
-        return f"<Phoneme phoneme='{self.phoneme}'>"
+        return f"<Label phoneme='{self.phoneme}'>"
 
 
 @dataclass
@@ -87,18 +61,18 @@ class Mora:
 
     Attributes
     ----------
-    consonant : Phoneme | None
+    consonant : Label | None
         子音
-    vowel : Phoneme
+    vowel : Label
         母音
     """
 
-    consonant: Phoneme | None
-    vowel: Phoneme
+    consonant: Label | None
+    vowel: Label
 
     def set_context(self, key: str, value: str):
         """
-        Moraクラス内に含まれるPhonemeのcontextのうち、指定されたキーの値を変更する
+        Moraクラス内に含まれるLabelのcontextのうち、指定されたキーの値を変更する
         consonantが存在する場合は、vowelと同じようにcontextを変更する
         Parameters
         ----------
@@ -113,12 +87,8 @@ def set_context(self, key: str, value: str):
 
     @property
     def phonemes(self):
-        """
-        音素群を返す
-        Returns
-        -------
-        phonemes : list[Phoneme]
-            母音しかない場合は母音のみ、子音もある場合は子音、母音の順番でPhonemeのリストを返す
+        """このモーラを構成するラベルリスト。母音ラベルのみの場合は [母音ラベル,]、子音ラベルもある場合は [子音ラベル, 母音ラベル]。
+        NOTE: `.labels` に名称変更予定
         """
         if self.consonant is not None:
             return [self.consonant, self.vowel]
@@ -144,41 +114,38 @@ class AccentPhrase:
     is_interrogative: bool
 
     @classmethod
-    def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
-        """音素系列をcontextで区切りAccentPhraseインスタンスを生成する"""
+    def from_labels(cls, labels: list[Label]) -> Self:
+        """ラベル系列をcontextで区切りAccentPhraseインスタンスを生成する"""
 
-        # NOTE:「モーラごとの音素系列」は音素系列をcontextで区切り生成される。
+        # NOTE:「モーラごとのラベル系列」はラベル系列をcontextで区切り生成される。
 
         moras: list[Mora] = []  # モーラ系列
-        mora_phonemes: list[Phoneme] = []  # モーラごとの音素系列を一時保存するコンテナ
+        mora_labels: list[Label] = []  # モーラごとのラベル系列を一時保存するコンテナ
 
-        for phoneme, next_phoneme in zip(phonemes, phonemes[1:] + [None]):
+        for label, next_label in zip(labels, labels[1:] + [None]):
             # モーラ抽出を打ち切る（ワークアラウンド、VOICEVOX/voicevox_engine#57）
-            # context a2（モーラ番号）の最大値が 49 であるため、49番目以降のモーラでは音素のモーラ番号を区切りに使えない
-            if int(phoneme.contexts["a2"]) == 49:
+            # context a2（モーラ番号）の最大値が 49 であるため、49番目以降のモーラではラベルのモーラ番号を区切りに使えない
+            if int(label.contexts["a2"]) == 49:
                 break
 
-            # 区切りまで音素系列を一時保存する
-            mora_phonemes.append(phoneme)
+            # 区切りまでラベル系列を一時保存する
+            mora_labels.append(label)
 
-            # 一時的な音素系列を確定させて処理する
+            # 一時的なラベル系列を確定させて処理する
             # a2はアクセント句内でのモーラ番号(1~49)
-            if (
-                next_phoneme is None
-                or phoneme.contexts["a2"] != next_phoneme.contexts["a2"]
-            ):
-                # モーラごとの音素系列長に基づいて子音と母音を得る
-                if len(mora_phonemes) == 1:
-                    consonant, vowel = None, mora_phonemes[0]
-                elif len(mora_phonemes) == 2:
-                    consonant, vowel = mora_phonemes[0], mora_phonemes[1]
+            if next_label is None or label.contexts["a2"] != next_label.contexts["a2"]:
+                # モーラごとのラベル系列長に基づいて子音と母音を得る
+                if len(mora_labels) == 1:
+                    consonant, vowel = None, mora_labels[0]
+                elif len(mora_labels) == 2:
+                    consonant, vowel = mora_labels[0], mora_labels[1]
                 else:
-                    raise ValueError(mora_phonemes)
+                    raise ValueError(mora_labels)
                 # 子音と母音からモーラを生成して保存する
                 mora = Mora(consonant=consonant, vowel=vowel)
                 moras.append(mora)
                 # 次に向けてリセット
-                mora_phonemes = []
+                mora_labels = []
 
         # アクセント位置を決定する
         # f2はアクセント句のアクセント位置(1~49)
@@ -199,7 +166,7 @@ def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
 
     def set_context(self, key: str, value: str):
         """
-        AccentPhraseに間接的に含まれる全てのPhonemeのcontextの、指定されたキーの値を変更する
+        AccentPhraseに間接的に含まれる全てのLabelのcontextの、指定されたキーの値を変更する
         Parameters
         ----------
         key : str
@@ -213,11 +180,12 @@ def set_context(self, key: str, value: str):
     @property
     def phonemes(self):
         """
-        音素群を返す
+        内包する全てのラベルを返す
+        NOTE: `.labels` に名称変更予定
         Returns
         -------
-        phonemes : list[Phoneme]
-            AccentPhraseに間接的に含まれる全てのPhonemeを返す
+        labels : list[Label]
+            AccentPhraseに間接的に含まれる全てのLabelを返す
         """
         return list(chain.from_iterable(m.phonemes for m in self.moras))
 
@@ -236,31 +204,31 @@ class BreathGroup:
     accent_phrases: list[AccentPhrase]
 
     @classmethod
-    def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
-        """音素系列をcontextで区切りBreathGroupインスタンスを生成する"""
+    def from_labels(cls, labels: list[Label]) -> Self:
+        """ラベル系列をcontextで区切りBreathGroupインスタンスを生成する"""
 
-        # NOTE:「アクセント句ごとの音素系列」は音素系列をcontextで区切り生成される。
+        # NOTE:「アクセント句ごとのラベル系列」はラベル系列をcontextで区切り生成される。
 
         accent_phrases: list[AccentPhrase] = []  # アクセント句系列
-        accent_phonemes: list[Phoneme] = []  # アクセント句ごとの音素系列を一時保存するコンテナ
+        accent_labels: list[Label] = []  # アクセント句ごとのラベル系列を一時保存するコンテナ
 
-        for phoneme, next_phoneme in zip(phonemes, phonemes[1:] + [None]):
-            # 区切りまで音素系列を一時保存する
-            accent_phonemes.append(phoneme)
+        for label, next_label in zip(labels, labels[1:] + [None]):
+            # 区切りまでラベル系列を一時保存する
+            accent_labels.append(label)
 
-            # 一時的な音素系列を確定させて処理する
+            # 一時的なラベル系列を確定させて処理する
             # i3はBreathGroupの番号
             # f5はBreathGroup内でのアクセント句の番号
             if (
-                next_phoneme is None
-                or phoneme.contexts["i3"] != next_phoneme.contexts["i3"]
-                or phoneme.contexts["f5"] != next_phoneme.contexts["f5"]
+                next_label is None
+                or label.contexts["i3"] != next_label.contexts["i3"]
+                or label.contexts["f5"] != next_label.contexts["f5"]
             ):
                 # アクセント句を生成して保存する
-                accent_phrase = AccentPhrase.from_phonemes(accent_phonemes)
+                accent_phrase = AccentPhrase.from_labels(accent_labels)
                 accent_phrases.append(accent_phrase)
                 # 次に向けてリセット
-                accent_phonemes = []
+                accent_labels = []
 
         # BreathGroup インスタンスを生成する
         breath_group = cls(accent_phrases=accent_phrases)
@@ -269,7 +237,7 @@ def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
 
     def set_context(self, key: str, value: str):
         """
-        BreathGroupに間接的に含まれる全てのPhonemeのcontextの、指定されたキーの値を変更する
+        BreathGroupに間接的に含まれる全てのLabelのcontextの、指定されたキーの値を変更する
         Parameters
         ----------
         key : str
@@ -283,11 +251,12 @@ def set_context(self, key: str, value: str):
     @property
     def phonemes(self):
         """
-        音素群を返す
+        内包する全てのラベルを返す
+        NOTE: `.labels` に名称変更予定
         Returns
         -------
-        phonemes : list[Phoneme]
-            BreathGroupに間接的に含まれる全てのPhonemeを返す
+        labels : list[Label]
+            BreathGroupに間接的に含まれる全てのLabelを返す
         """
         return list(
             chain.from_iterable(
@@ -305,38 +274,38 @@ class Utterance:
     ----------
     breath_groups : list[BreathGroup]
         発声の区切りのリスト
-    pauses : list[Phoneme]
+    pauses : list[Label]
         無音のリスト
     """
 
     breath_groups: list[BreathGroup]
-    pauses: list[Phoneme]
+    pauses: list[Label]
 
     @classmethod
-    def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
-        """音素系列をポーズで区切りUtteranceインスタンスを生成する"""
+    def from_labels(cls, labels: list[Label]) -> Self:
+        """ラベル系列をポーズで区切りUtteranceインスタンスを生成する"""
 
-        # NOTE:「BreathGroupごとの音素系列」は音素系列をポーズで区切り生成される。
+        # NOTE:「BreathGroupごとのラベル系列」はラベル系列をポーズで区切り生成される。
 
-        pauses: list[Phoneme] = []  # ポーズ音素のリスト
+        pauses: list[Label] = []  # ポーズラベルのリスト
         breath_groups: list[BreathGroup] = []  # BreathGroup のリスト
-        group_phonemes: list[Phoneme] = []  # BreathGroupごとの音素系列を一時保存するコンテナ
+        group_labels: list[Label] = []  # BreathGroupごとのラベル系列を一時保存するコンテナ
 
-        for phoneme in phonemes:
-            # ポーズが出現するまで音素系列を一時保存する
-            if not phoneme.is_pause():
-                group_phonemes.append(phoneme)
+        for label in labels:
+            # ポーズが出現するまでラベル系列を一時保存する
+            if not label.is_pause():
+                group_labels.append(label)
 
-            # 一時的な音素系列を確定させて処理する
+            # 一時的なラベル系列を確定させて処理する
             else:
-                # ポーズ音素を保存する
-                pauses.append(phoneme)
-                if len(group_phonemes) > 0:
-                    # 音素系列からBreathGroupを生成して保存する
-                    breath_group = BreathGroup.from_phonemes(group_phonemes)
+                # ポーズラベルを保存する
+                pauses.append(label)
+                if len(group_labels) > 0:
+                    # ラベル系列からBreathGroupを生成して保存する
+                    breath_group = BreathGroup.from_labels(group_labels)
                     breath_groups.append(breath_group)
                     # 次に向けてリセット
-                    group_phonemes = []
+                    group_labels = []
 
         # Utteranceインスタンスを生成する
         utterance = cls(breath_groups=breath_groups, pauses=pauses)
@@ -345,7 +314,7 @@ def from_phonemes(cls, phonemes: list[Phoneme]) -> Self:
 
     def set_context(self, key: str, value: str):
         """
-        Utteranceに間接的に含まれる全てのPhonemeのcontextの、指定されたキーの値を変更する
+        Utteranceに間接的に含まれる全てのLabelのcontextの、指定されたキーの値を変更する
         Parameters
         ----------
         key : str
@@ -359,11 +328,12 @@ def set_context(self, key: str, value: str):
     @property
     def phonemes(self):
         """
-        音素群を返す
+        内包する全てのラベルを返す
+        NOTE: `.labels` に名称変更予定
         Returns
         -------
-        phonemes : list[Phoneme]
-            Utteranceクラスに直接的・間接的に含まれる、全てのPhonemeを返す
+        labels : list[Label]
+            Utteranceクラスに直接的・間接的に含まれる、全てのLabelを返す
         """
         accent_phrases = list(
             chain.from_iterable(
@@ -427,15 +397,15 @@ def phonemes(self):
             ),
         )
 
-        phonemes: list[Phoneme] = []
+        labels: list[Label] = []
         for i in range(len(self.pauses)):
             if self.pauses[i] is not None:
-                phonemes += [self.pauses[i]]
+                labels += [self.pauses[i]]
 
             if i < len(self.pauses) - 1:
-                phonemes += self.breath_groups[i].phonemes
+                labels += self.breath_groups[i].phonemes
 
-        return phonemes
+        return labels
 
 
 def extract_full_context_label(text: str):
@@ -450,7 +420,7 @@ def extract_full_context_label(text: str):
     utterance : Utterance
         発話
     """
-    labels = pyopenjtalk.extract_fullcontext(text)
-    phonemes = [Phoneme.from_label(label=label) for label in labels]
-    utterance = Utterance.from_phonemes(phonemes)
+    features: list[str] = pyopenjtalk.extract_fullcontext(text)
+    labels = [Label.from_feature(feature) for feature in features]
+    utterance = Utterance.from_labels(labels)
     return utterance

From 0f122e76276aae85c0b1842cb1dd94bcd52926e1 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Thu, 21 Dec 2023 14:24:41 +0900
Subject: [PATCH 046/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E7=96=91?=
 =?UTF-8?q?=E5=95=8F=E5=BD=A2=20upspeak=20=E3=81=AE=E5=8D=98=E7=B4=94?=
 =?UTF-8?q?=E5=8C=96=20(#904)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: アクセント句更新を部分置換に簡略化

* refactor: 関数ネストの解消

* refactor: 関数ネストの排除

* refactor: upspeak適用有無を関数化

* fix: lint

* refactor: deepcopy 範囲最小化

上位関数において query 自体が deepcopy されている。よって本関数における deepcopy は複製される last_mora のみで必要十分である。

* refactor: 疑問形更新の簡略化

* fix: lint

* refactor: 疑問形付与条件の明確化

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 .../tts_pipeline/tts_engine_base.py           | 107 ++++++------------
 1 file changed, 32 insertions(+), 75 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index 7ffca18a5..c7c5f21ff 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -9,6 +9,11 @@
 from .full_context_label import Utterance, extract_full_context_label
 from .mora_list import openjtalk_mora2text
 
+# 疑問文語尾定数
+UPSPEAK_LENGTH = 0.15
+UPSPEAK_PITCH_ADD = 0.3
+UPSPEAK_PITCH_MAX = 6.5
+
 
 def mora_to_text(mora: str) -> str:
     """
@@ -30,78 +35,31 @@ def mora_to_text(mora: str) -> str:
         return mora
 
 
-def adjust_interrogative_accent_phrases(
-    accent_phrases: List[AccentPhrase],
-) -> List[AccentPhrase]:
-    """
-    アクセント句系列の必要に応じて疑問系に補正
-    各accent_phraseの末尾のモーラより少し音の高い有声母音モーラを付与するすることで疑問文ぽくする
-    Parameters
-    ----------
-    accent_phrases : List[AccentPhrase]
-        アクセント句系列
-    Returns
-    -------
-    accent_phrases : List[AccentPhrase]
-        必要に応じて疑問形補正されたアクセント句系列
-    """
-    # NOTE: リファクタリング時に適切な場所へ移動させること
-    return [
-        AccentPhrase(
-            moras=adjust_interrogative_moras(accent_phrase),
-            accent=accent_phrase.accent,
-            pause_mora=accent_phrase.pause_mora,
-            is_interrogative=accent_phrase.is_interrogative,
-        )
-        for accent_phrase in accent_phrases
-    ]
-
-
-def adjust_interrogative_moras(accent_phrase: AccentPhrase) -> List[Mora]:
-    """
-    アクセント句に含まれるモーラ系列の必要に応じた疑問形補正
-    Parameters
-    ----------
-    accent_phrase : AccentPhrase
-        アクセント句
-    Returns
-    -------
-    moras : List[Mora]
-        補正済みモーラ系列
-    """
-    moras = copy.deepcopy(accent_phrase.moras)
-    # 疑問形補正条件: 疑問形フラグON & 終端有声母音
-    if accent_phrase.is_interrogative and not (len(moras) == 0 or moras[-1].pitch == 0):
-        interrogative_mora = make_interrogative_mora(moras[-1])
-        moras.append(interrogative_mora)
-        return moras
-    else:
-        return moras
-
+def apply_interrogative_upspeak(
+    accent_phrases: list[AccentPhrase], enable_interrogative_upspeak: bool
+) -> list[AccentPhrase]:
+    """必要に応じて各アクセント句の末尾へ疑問形モーラ（同一母音・継続長 0.15秒・音高↑）を付与する"""
+    # NOTE: 将来的にAudioQueryインスタンスを引数にする予定
+    if not enable_interrogative_upspeak:
+        return accent_phrases
 
-def make_interrogative_mora(last_mora: Mora) -> Mora:
-    """
-    疑問形用のモーラ（同一母音・継続長 0.15秒・音高↑）の生成
-    Parameters
-    ----------
-    last_mora : Mora
-        疑問形にするモーラ
-    Returns
-    -------
-    mora : Mora
-        疑問形用のモーラ
-    """
-    fix_vowel_length = 0.15
-    adjust_pitch = 0.3
-    max_pitch = 6.5
-    return Mora(
-        text=openjtalk_mora2text[last_mora.vowel],
-        consonant=None,
-        consonant_length=None,
-        vowel=last_mora.vowel,
-        vowel_length=fix_vowel_length,
-        pitch=min(last_mora.pitch + adjust_pitch, max_pitch),
-    )
+    for accent_phrase in accent_phrases:
+        moras = accent_phrase.moras
+        if len(moras) == 0:
+            continue
+        # 疑問形補正条件: 疑問形アクセント句 & 末尾有声モーラ
+        if accent_phrase.is_interrogative and moras[-1].pitch > 0:
+            last_mora = copy.deepcopy(moras[-1])
+            upspeak_mora = Mora(
+                text=openjtalk_mora2text[last_mora.vowel],
+                consonant=None,
+                consonant_length=None,
+                vowel=last_mora.vowel,
+                vowel_length=UPSPEAK_LENGTH,
+                pitch=min(last_mora.pitch + UPSPEAK_PITCH_ADD, UPSPEAK_PITCH_MAX),
+            )
+            accent_phrase.moras += [upspeak_mora]
+    return accent_phrases
 
 
 def full_context_label_moras_to_moras(
@@ -335,10 +293,9 @@ def synthesis(
         """
         # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
         query = copy.deepcopy(query)
-        if enable_interrogative_upspeak:
-            query.accent_phrases = adjust_interrogative_accent_phrases(
-                query.accent_phrases
-            )
+        query.accent_phrases = apply_interrogative_upspeak(
+            query.accent_phrases, enable_interrogative_upspeak
+        )
         return self._synthesis_impl(query, style_id)
 
     @abstractmethod

From 7da0e381d54066f61436866dc6f909be83bcf0db Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 09:23:29 +0900
Subject: [PATCH 047/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E5=90=88?=
 =?UTF-8?q?=E6=88=90=E3=82=A8=E3=83=B3=E3=82=B8=E3=83=B3=E7=94=9F=E6=88=90?=
 =?UTF-8?q?=E3=81=AE=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E8=BF=BD=E5=8A=A0?=
 =?UTF-8?q?=20(#868)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: 合成エンジン生成のコメント追加

* Fix: 引数名コメント

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 .../tts_pipeline/make_tts_engines.py          | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/make_tts_engines.py b/voicevox_engine/tts_pipeline/make_tts_engines.py
index 8ee63c907..c1477b73c 100644
--- a/voicevox_engine/tts_pipeline/make_tts_engines.py
+++ b/voicevox_engine/tts_pipeline/make_tts_engines.py
@@ -47,6 +47,8 @@ def make_synthesis_engines(
         )
         cpu_num_threads = 0
 
+    # ディレクトリを設定する
+    # 引数による指定を反映する
     if voicevox_dir is not None:
         if voicelib_dirs is not None:
             voicelib_dirs.append(voicevox_dir)
@@ -63,22 +65,33 @@ def make_synthesis_engines(
         if runtime_dirs is None:
             runtime_dirs = [root_dir]
 
+    # `~`をホームディレクトリのパスに置き換える
     voicelib_dirs = [p.expanduser() for p in voicelib_dirs]
     runtime_dirs = [p.expanduser() for p in runtime_dirs]
 
+    # ランタイムをロードする
     load_runtime_lib(runtime_dirs)
 
+    # コアをロードし `synthesis_engines` へ登録する
     synthesis_engines = {}
 
     if not enable_mock:
 
         def load_core_library(core_dir: Path, suppress_error: bool = False):
             """
-            指定されたディレクトリにあるコアを読み込む。
-            ユーザーディレクトリの場合は存在しないこともあるので、エラーを抑制すると良い。
+            指定されたコアをロードし `synthesis_engines` へ登録する。
+            Parameters
+            ----------
+            core_dir : Path
+                直下にコア（共有ライブラリ）が存在するディレクトリ、あるいはその候補
+            suppress_error: bool
+                エラーを抑制する。`core_dir` がコア候補であることを想定。
             """
+            # 指定されたコアをロードし登録する
             try:
+                # コアをロードする
                 core = CoreWrapper(use_gpu, core_dir, cpu_num_threads, load_all_models)
+                # コアを登録する
                 metas = json.loads(core.metas())
                 core_version = metas[0]["version"]
                 print(f"Info: Loading core {core_version}.")
@@ -90,13 +103,16 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
                 else:
                     synthesis_engines[core_version] = TTSEngine(core=core)
             except Exception:
+                # コアでなかった場合のエラーを抑制する
                 if not suppress_error:
                     raise
 
+        # `voicelib_dirs` 下のコアをロードし登録する
         for core_dir in voicelib_dirs:
             load_core_library(core_dir)
 
-        # ユーザーディレクトリにあるコアを読み込む
+        # ユーザーディレクトリ下のコアをロードし登録する
+        # コア候補を列挙する
         user_voicelib_dirs = []
         core_libraries_dir = get_save_dir() / "core_libraries"
         core_libraries_dir.mkdir(exist_ok=True)
@@ -105,7 +121,7 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
             if not path.is_dir():
                 continue
             user_voicelib_dirs.append(path)
-
+        # コア候補をロードし登録する。候補がコアで無かった場合のエラーを抑制する。
         for core_dir in user_voicelib_dirs:
             load_core_library(core_dir, suppress_error=True)
 

From fd9bbc21dacd78c816773ee299f07cd7a1299705 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 09:31:34 +0900
Subject: [PATCH 048/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=B3?=
 =?UTF-8?q?=E3=82=A2=E7=AE=A1=E7=90=86=E6=A9=9F=E8=83=BD=E3=82=92=20`CoreE?=
 =?UTF-8?q?ngine`=20=E3=81=B8=E5=88=86=E5=89=B2=20(#872)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: コア管理機能を`CoreEngine` へ分割

* Fix: lint

* Fix: `CoreAdapter` への改名

* Add: 将来展望NOTE

* Fix: コア直接callの修正

* Fix: ラッパ関数名称変更
---
 voicevox_engine/tts_pipeline/tts_engine.py | 126 ++++++++++++++++-----
 1 file changed, 95 insertions(+), 31 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 803f721e5..30afdddbb 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -249,8 +249,11 @@ def raw_wave_to_output_wave(query: AudioQuery, wave: ndarray, sr_wave: int) -> n
     return wave
 
 
-class TTSEngine(TTSEngineBase):
-    """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
+class CoreAdapter:
+    """
+    コアのアダプター。
+    ついでにコア内部で推論している処理をプロセスセーフにする。
+    """
 
     def __init__(self, core: CoreWrapper):
         super().__init__()
@@ -296,6 +299,82 @@ def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
         except OldCoreError:
             return True  # コアが古い場合はどうしようもないのでTrueを返す
 
+    def safe_yukarin_s_forward(self, phoneme_list_s: ndarray, style_id: int) -> ndarray:
+        # TODO: `self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)` のファサード的移植
+        with self.mutex:
+            phoneme_length = self.core.yukarin_s_forward(
+                length=len(phoneme_list_s),
+                phoneme_list=phoneme_list_s,
+                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+            )
+        return phoneme_length
+
+    def safe_yukarin_sa_forward(
+        self,
+        vowel_phoneme_list: ndarray,
+        consonant_phoneme_list: ndarray,
+        start_accent_list: ndarray,
+        end_accent_list: ndarray,
+        start_accent_phrase_list: ndarray,
+        end_accent_phrase_list: ndarray,
+        style_id: int,
+    ) -> ndarray:
+        # TODO: `self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)` のファサード的移植
+        with self.mutex:
+            f0_list = self.core.yukarin_sa_forward(
+                length=vowel_phoneme_list.shape[0],
+                vowel_phoneme_list=vowel_phoneme_list[numpy.newaxis],
+                consonant_phoneme_list=consonant_phoneme_list[numpy.newaxis],
+                start_accent_list=start_accent_list[numpy.newaxis],
+                end_accent_list=end_accent_list[numpy.newaxis],
+                start_accent_phrase_list=start_accent_phrase_list[numpy.newaxis],
+                end_accent_phrase_list=end_accent_phrase_list[numpy.newaxis],
+                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+            )[0]
+        return f0_list
+
+    def safe_decode_forward(
+        self, phoneme: ndarray, f0: ndarray, style_id: int
+    ) -> tuple[ndarray, int]:
+        # TODO: `self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)` のファサード的移植
+        with self.mutex:
+            wave = self.core.decode_forward(
+                length=phoneme.shape[0],
+                phoneme_size=phoneme.shape[1],
+                f0=f0[:, numpy.newaxis],
+                phoneme=phoneme,
+                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+            )
+        sr_wave = self.default_sampling_rate
+        return wave, sr_wave
+
+
+class TTSEngine(TTSEngineBase):
+    """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
+
+    def __init__(self, core: CoreWrapper):
+        super().__init__()
+        self.core = CoreAdapter(core)
+        # NOTE: self.coreは将来的に消す予定
+
+    @property
+    def default_sampling_rate(self) -> int:
+        return self.core.default_sampling_rate
+
+    @property
+    def speakers(self) -> str:
+        return self.core.speakers
+
+    @property
+    def supported_devices(self) -> str | None:
+        return self.core.supported_devices
+
+    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
+        return self.core.initialize_style_id_synthesis(style_id, skip_reinit)
+
+    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
+        return self.core.is_initialized_style_id_synthesis(style_id)
+
     def replace_phoneme_length(
         self, accent_phrases: List[AccentPhrase], style_id: int
     ) -> List[AccentPhrase]:
@@ -313,7 +392,7 @@ def replace_phoneme_length(
             母音・子音の長さが設定されたアクセント句モデルのリスト
         """
         # モデルがロードされていない場合はロードする
-        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+        self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)
         # phoneme
         # AccentPhraseをすべてMoraおよびOjtPhonemeの形に分解し、処理可能な形にする
         flatten_moras, phoneme_data_list = pre_process(accent_phrases)
@@ -326,12 +405,7 @@ def replace_phoneme_length(
             [p.phoneme_id for p in phoneme_data_list], dtype=numpy.int64
         )
         # Phoneme IDのリスト(phoneme_list_s)をyukarin_s_forwardにかけ、推論器によって適切な音素の長さを割り当てる
-        with self.mutex:
-            phoneme_length = self.core.yukarin_s_forward(
-                length=len(phoneme_list_s),
-                phoneme_list=phoneme_list_s,
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
-            )
+        phoneme_length = self.core.safe_yukarin_s_forward(phoneme_list_s, style_id)
 
         # yukarin_s_forwarderの結果をaccent_phrasesに反映する
         # flatten_moras変数に展開された値を変更することでコード量を削減しつつaccent_phrases内のデータを書き換えている
@@ -362,7 +436,7 @@ def replace_mora_pitch(
             音高(ピッチ)が設定されたアクセント句モデルのリスト
         """
         # モデルがロードされていない場合はロードする
-        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+        self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)
         # numpy.concatenateが空リストだとエラーを返すのでチェック
         if len(accent_phrases) == 0:
             return []
@@ -463,17 +537,15 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int):
         )
 
         # 今までに生成された情報をyukarin_sa_forwardにかけ、推論器によってモーラごとに適切な音高(ピッチ)を割り当てる
-        with self.mutex:
-            f0_list = self.core.yukarin_sa_forward(
-                length=vowel_phoneme_list.shape[0],
-                vowel_phoneme_list=vowel_phoneme_list[numpy.newaxis],
-                consonant_phoneme_list=consonant_phoneme_list[numpy.newaxis],
-                start_accent_list=start_accent_list[numpy.newaxis],
-                end_accent_list=end_accent_list[numpy.newaxis],
-                start_accent_phrase_list=start_accent_phrase_list[numpy.newaxis],
-                end_accent_phrase_list=end_accent_phrase_list[numpy.newaxis],
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
-            )[0]
+        f0_list = self.core.safe_yukarin_sa_forward(
+            vowel_phoneme_list,
+            consonant_phoneme_list,
+            start_accent_list,
+            end_accent_list,
+            start_accent_phrase_list,
+            end_accent_phrase_list,
+            style_id,
+        )
 
         # 無声母音を含むMoraに関しては、音高(ピッチ)を0にする
         for i, p in enumerate(vowel_phoneme_data_list):
@@ -502,20 +574,12 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
             音声合成結果
         """
         # モデルがロードされていない場合はロードする
-        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+        self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)
 
         phoneme, f0 = query_to_decoder_feature(query)
 
         # 今まで生成された情報をdecode_forwardにかけ、推論器によって音声波形を生成する
-        with self.mutex:
-            raw_wave = self.core.decode_forward(
-                length=phoneme.shape[0],
-                phoneme_size=phoneme.shape[1],
-                f0=f0[:, numpy.newaxis],
-                phoneme=phoneme,
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
-            )
-            sr_raw_wave = self.default_sampling_rate
+        raw_wave, sr_raw_wave = self.core.safe_decode_forward(phoneme, f0, style_id)
 
         wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
 

From 07e3be612447a8a8d391e66df524dcfbe2fdf89e Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 11:31:41 +0900
Subject: [PATCH 049/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`test=5Ffull=5Fco?=
 =?UTF-8?q?ntext=5Flabel.py`=20=E7=B0=A1=E7=95=A5=E5=8C=96=20(#906)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `join` の関数切り出し

* refactor: テスト util 関数の廃止
---
 test/test_full_context_label.py | 91 ++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 48 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 77acad8b9..3456c45df 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -29,7 +29,11 @@ def contexts_to_feature(contexts: dict[str, str]) -> str:
     ).format(**contexts)
 
 
-def features(ojt_container: Mora | AccentPhrase | BreathGroup | Utterance):
+# OpenJTalk コンテナクラス
+OjtContainer = Mora | AccentPhrase | BreathGroup | Utterance
+
+
+def features(ojt_container: OjtContainer):
     """コンテナインスタンスに直接的・間接的に含まれる全ての feature を返す"""
     return [contexts_to_feature(p.contexts) for p in ojt_container.phonemes]
 
@@ -127,6 +131,20 @@ def setUp(self):
         ]
 
 
+def jointed_phonemes(ojt_container: OjtContainer) -> str:
+    """コンテナインスタンスに直接的・間接的に含まれる全ラベルの音素文字を結合してを返す"""
+    return "".join([label.phoneme for label in ojt_container.phonemes])
+    # NOTE: `.phonemes` は `.labels` にリネーム予定
+    # return "".join([label.phoneme for label in ojt_container.labels])
+
+
+def space_jointed_phonemes(ojt_container: OjtContainer) -> str:
+    """コンテナインスタンスに直接的・間接的に含まれる全ラベルの音素文字を ` ` 挟みながら結合してを返す"""
+    return " ".join([label.phoneme for label in ojt_container.phonemes])
+    # NOTE: `.phonemes` は `.labels` にリネーム予定
+    # return " ".join([label.phoneme for label in ojt_container.labels])
+
+
 class TestPhoneme(TestBasePhonemes):
     def test_phoneme(self):
         self.assertEqual(
@@ -209,26 +227,21 @@ def setUp(self) -> None:
             consonant=self.phonemes_hello_hiho[17], vowel=self.phonemes_hello_hiho[18]
         )
 
-    def assert_phonemes(self, mora: Mora, mora_str: str) -> None:
-        self.assertEqual(
-            "".join([phoneme.phoneme for phoneme in mora.phonemes]), mora_str
-        )
-
     def assert_labels(self, mora: Mora, label_start: int, label_end: int) -> None:
         self.assertEqual(
             features(mora), self.test_case_hello_hiho[label_start:label_end]
         )
 
     def test_phonemes(self) -> None:
-        self.assert_phonemes(self.mora_hello_1, "ko")
-        self.assert_phonemes(self.mora_hello_2, "N")
-        self.assert_phonemes(self.mora_hello_3, "ni")
-        self.assert_phonemes(self.mora_hello_4, "chi")
-        self.assert_phonemes(self.mora_hello_5, "wa")
-        self.assert_phonemes(self.mora_hiho_1, "hi")
-        self.assert_phonemes(self.mora_hiho_2, "ho")
-        self.assert_phonemes(self.mora_hiho_3, "de")
-        self.assert_phonemes(self.mora_hiho_4, "sU")
+        self.assertEqual(jointed_phonemes(self.mora_hello_1), "ko")
+        self.assertEqual(jointed_phonemes(self.mora_hello_2), "N")
+        self.assertEqual(jointed_phonemes(self.mora_hello_3), "ni")
+        self.assertEqual(jointed_phonemes(self.mora_hello_4), "chi")
+        self.assertEqual(jointed_phonemes(self.mora_hello_5), "wa")
+        self.assertEqual(jointed_phonemes(self.mora_hiho_1), "hi")
+        self.assertEqual(jointed_phonemes(self.mora_hiho_2), "ho")
+        self.assertEqual(jointed_phonemes(self.mora_hiho_3), "de")
+        self.assertEqual(jointed_phonemes(self.mora_hiho_4), "sU")
 
     def test_labels(self) -> None:
         self.assert_labels(self.mora_hello_1, 1, 3)
@@ -246,7 +259,7 @@ def test_set_context(self):
         mora_hello_1 = deepcopy(self.mora_hello_1)
         # phonemeにあたる"p3"を書き換える
         mora_hello_1.set_context("p3", "a")
-        self.assert_phonemes(mora_hello_1, "aa")
+        self.assertEqual(jointed_phonemes(mora_hello_1), "aa")
 
 
 class TestAccentPhrase(TestBasePhonemes):
@@ -269,22 +282,13 @@ def test_set_context(self):
         accent_phrase_hello = deepcopy(self.accent_phrase_hello)
         # phonemeにあたる"p3"を書き換える
         accent_phrase_hello.set_context("p3", "a")
-        self.assertEqual(
-            "".join([phoneme.phoneme for phoneme in accent_phrase_hello.phonemes]),
-            "aaaaaaaaa",
-        )
+        self.assertEqual(jointed_phonemes(accent_phrase_hello), "aaaaaaaaa")
 
     def test_phonemes(self):
-        self.assertEqual(
-            " ".join(
-                [phoneme.phoneme for phoneme in self.accent_phrase_hello.phonemes]
-            ),
-            "k o N n i ch i w a",
-        )
-        self.assertEqual(
-            " ".join([phoneme.phoneme for phoneme in self.accent_phrase_hiho.phonemes]),
-            "h i h o d e s U",
-        )
+        outputs_hello = space_jointed_phonemes(self.accent_phrase_hello)
+        outputs_hiho = space_jointed_phonemes(self.accent_phrase_hiho)
+        self.assertEqual(outputs_hello, "k o N n i ch i w a")
+        self.assertEqual(outputs_hiho, "h i h o d e s U")
 
     def test_labels(self):
         self.assertEqual(
@@ -310,20 +314,13 @@ def test_set_context(self):
         breath_group_hello = deepcopy(self.breath_group_hello)
         # phonemeにあたる"p3"を書き換える
         breath_group_hello.set_context("p3", "a")
-        self.assertEqual(
-            "".join([phoneme.phoneme for phoneme in breath_group_hello.phonemes]),
-            "aaaaaaaaa",
-        )
+        self.assertEqual(jointed_phonemes(breath_group_hello), "aaaaaaaaa")
 
     def test_phonemes(self):
-        self.assertEqual(
-            " ".join([phoneme.phoneme for phoneme in self.breath_group_hello.phonemes]),
-            "k o N n i ch i w a",
-        )
-        self.assertEqual(
-            " ".join([phoneme.phoneme for phoneme in self.breath_group_hiho.phonemes]),
-            "h i h o d e s U",
-        )
+        outputs_hello = space_jointed_phonemes(self.breath_group_hello)
+        outputs_hiho = space_jointed_phonemes(self.breath_group_hiho)
+        self.assertEqual(outputs_hello, "k o N n i ch i w a")
+        self.assertEqual(outputs_hiho, "h i h o d e s U")
 
     def test_labels(self):
         self.assertEqual(
@@ -340,12 +337,10 @@ def setUp(self) -> None:
         self.utterance_hello_hiho = Utterance.from_labels(self.phonemes_hello_hiho)
 
     def test_phonemes(self):
-        self.assertEqual(
-            " ".join(
-                [phoneme.phoneme for phoneme in self.utterance_hello_hiho.phonemes]
-            ),
-            "sil k o N n i ch i w a pau h i h o d e s U sil",
-        )
+        outputs_hello_hiho = space_jointed_phonemes(self.utterance_hello_hiho)
+        expects_hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
+        self.assertEqual(outputs_hello_hiho, expects_hello_hiho)
+
         changed_utterance = Utterance.from_labels(self.utterance_hello_hiho.phonemes)
         self.assertEqual(len(changed_utterance.breath_groups), 2)
         accent_phrases = list(

From 7ab92263adf99b07d9cde5d58426356e2fbaa660 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 11:38:27 +0900
Subject: [PATCH 050/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`replace=5Fphonem?=
 =?UTF-8?q?e=5Flength()`=20=E3=81=AE=E8=A6=8B=E9=80=9A=E3=81=97=E6=94=B9?=
 =?UTF-8?q?=E5=96=84=20(#909)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: 関数ネストの削除、変数名の簡略化、関数配置変更

* fix: 変数名上書き

* Update voicevox_engine/tts_pipeline/tts_engine.py

* Update voicevox_engine/tts_pipeline/tts_engine.py

* コンフリクト解消ミス

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/tts_pipeline/tts_engine.py | 63 +++++++++-------------
 1 file changed, 26 insertions(+), 37 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 30afdddbb..b92432255 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -376,46 +376,35 @@ def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
         return self.core.is_initialized_style_id_synthesis(style_id)
 
     def replace_phoneme_length(
-        self, accent_phrases: List[AccentPhrase], style_id: int
-    ) -> List[AccentPhrase]:
-        """
-        accent_phrasesの母音・子音の長さを設定する
-        Parameters
-        ----------
-        accent_phrases : List[AccentPhrase]
-            アクセント句モデルのリスト
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        accent_phrases : List[AccentPhrase]
-            母音・子音の長さが設定されたアクセント句モデルのリスト
-        """
+        self, accent_phrases: list[AccentPhrase], style_id: int
+    ) -> list[AccentPhrase]:
+        """アクセント句系列に含まれるモーラの音素長属性をスタイルに合わせて更新する"""
         # モデルがロードされていない場合はロードする
         self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)
-        # phoneme
-        # AccentPhraseをすべてMoraおよびOjtPhonemeの形に分解し、処理可能な形にする
-        flatten_moras, phoneme_data_list = pre_process(accent_phrases)
-        # OjtPhonemeの形に分解されたもの(phoneme_data_list)から、vowel(母音)の位置を抜き出す
-        _, _, vowel_indexes_data = split_mora(phoneme_data_list)
 
-        # yukarin_s
-        # OjtPhonemeのリストからOjtPhonemeのPhoneme ID(OpenJTalkにおける音素のID)のリストを作る
-        phoneme_list_s = numpy.array(
-            [p.phoneme_id for p in phoneme_data_list], dtype=numpy.int64
-        )
-        # Phoneme IDのリスト(phoneme_list_s)をyukarin_s_forwardにかけ、推論器によって適切な音素の長さを割り当てる
-        phoneme_length = self.core.safe_yukarin_s_forward(phoneme_list_s, style_id)
-
-        # yukarin_s_forwarderの結果をaccent_phrasesに反映する
-        # flatten_moras変数に展開された値を変更することでコード量を削減しつつaccent_phrases内のデータを書き換えている
-        for i, mora in enumerate(flatten_moras):
-            mora.consonant_length = (
-                phoneme_length[vowel_indexes_data[i + 1] - 1]
-                if mora.consonant is not None
-                else None
-            )
-            mora.vowel_length = phoneme_length[vowel_indexes_data[i + 1]]
+        # モーラ系列を抽出する
+        moras = to_flatten_moras(accent_phrases)
+
+        # 音素系列を抽出し前後無音を付加する
+        phonemes = to_flatten_phonemes(moras)
+        phonemes = [OjtPhoneme("pau")] + phonemes + [OjtPhoneme("pau")]
+
+        # 音素クラスから音素IDスカラへ表現を変換する
+        phoneme_ids = numpy.array([p.phoneme_id for p in phonemes], dtype=numpy.int64)
+
+        # コアを用いて音素長を生成する
+        phoneme_lengths = self.core.safe_yukarin_s_forward(phoneme_ids, style_id)
+
+        # 生成結果でモーラ内の音素長属性を置換する
+        vowel_indexes = [
+            i for i, p in enumerate(phonemes) if p.phoneme in mora_phoneme_list
+        ]
+        for i, mora in enumerate(moras):
+            if mora.consonant is None:
+                mora.consonant_length = None
+            else:
+                mora.consonant_length = phoneme_lengths[vowel_indexes[i + 1] - 1]
+            mora.vowel_length = phoneme_lengths[vowel_indexes[i + 1]]
 
         return accent_phrases
 

From 2fda2ca59df39cc4a343e71e4684bf0da3ff6dda Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Fri, 22 Dec 2023 11:43:16 +0900
Subject: [PATCH 051/177] =?UTF-8?q?ENH:=20Numpy=E3=82=92=E6=9B=B4=E6=96=B0?=
 =?UTF-8?q?=20(#911)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 poetry.lock              | 63 +++++++++++++++++++++++-----------------
 requirements-dev.txt     |  2 +-
 requirements-license.txt |  2 +-
 requirements-test.txt    |  2 +-
 requirements.txt         |  2 +-
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 128c9d660..77bc5f94a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1267,36 +1267,47 @@ setuptools = "*"
 
 [[package]]
 name = "numpy"
-version = "1.25.2"
+version = "1.26.2"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"},
-    {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"},
-    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"},
-    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"},
-    {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"},
-    {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"},
-    {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"},
-    {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"},
-    {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"},
-    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"},
-    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"},
-    {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"},
-    {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"},
-    {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"},
-    {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"},
-    {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"},
-    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"},
-    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"},
-    {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"},
-    {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"},
-    {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"},
-    {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"},
-    {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"},
-    {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"},
-    {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
+    {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"},
+    {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"},
+    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"},
+    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"},
+    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"},
+    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"},
+    {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"},
+    {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"},
+    {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"},
+    {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"},
+    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"},
+    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"},
+    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"},
+    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"},
+    {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"},
+    {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"},
+    {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"},
+    {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"},
+    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"},
+    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"},
+    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"},
+    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"},
+    {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"},
+    {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"},
+    {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"},
+    {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"},
+    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"},
+    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"},
+    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"},
+    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"},
+    {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"},
+    {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"},
+    {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"},
+    {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"},
+    {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"},
+    {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"},
 ]
 
 [[package]]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index a42435195..cfdef2322 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -35,7 +35,7 @@ markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12"
 more-itertools==10.1.0 ; python_version >= "3.11" and python_version < "3.12"
 msgpack==1.0.5 ; python_version >= "3.11" and python_version < "3.12"
 nodeenv==1.8.0 ; python_version >= "3.11" and python_version < "3.12"
-numpy==1.25.2 ; python_version >= "3.11" and python_version < "3.12"
+numpy==1.26.2 ; python_version >= "3.11" and python_version < "3.12"
 packaging==23.1 ; python_version >= "3.11" and python_version < "3.12"
 pefile==2023.2.7 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "win32"
 pexpect==4.8.0 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/requirements-license.txt b/requirements-license.txt
index 9d58db2b5..2269d3aec 100644
--- a/requirements-license.txt
+++ b/requirements-license.txt
@@ -12,7 +12,7 @@ h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12"
 idna==3.4 ; python_version >= "3.11" and python_version < "3.12"
 jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12"
 markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12"
-numpy==1.25.2 ; python_version >= "3.11" and python_version < "3.12"
+numpy==1.26.2 ; python_version >= "3.11" and python_version < "3.12"
 pip-licenses==4.3.2 ; python_version >= "3.11" and python_version < "3.12"
 platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12"
 prettytable==3.8.0 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/requirements-test.txt b/requirements-test.txt
index 6f891c187..f2b18f0e2 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -47,7 +47,7 @@ more-itertools==10.1.0 ; python_version >= "3.11" and python_version < "3.12"
 msgpack==1.0.5 ; python_version >= "3.11" and python_version < "3.12"
 mypy-extensions==1.0.0 ; python_version >= "3.11" and python_version < "3.12"
 mypy==1.6.0 ; python_version >= "3.11" and python_version < "3.12"
-numpy==1.25.2 ; python_version >= "3.11" and python_version < "3.12"
+numpy==1.26.2 ; python_version >= "3.11" and python_version < "3.12"
 packaging==23.1 ; python_version >= "3.11" and python_version < "3.12"
 pathspec==0.11.2 ; python_version >= "3.11" and python_version < "3.12"
 pexpect==4.8.0 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/requirements.txt b/requirements.txt
index 047c593c9..51add151b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,7 @@ h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12"
 idna==3.4 ; python_version >= "3.11" and python_version < "3.12"
 jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12"
 markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12"
-numpy==1.25.2 ; python_version >= "3.11" and python_version < "3.12"
+numpy==1.26.2 ; python_version >= "3.11" and python_version < "3.12"
 platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12"
 pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12"
 pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12"

From 755aaff5958dce21259adae1fc95c02bac1e5a45 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 11:48:20 +0900
Subject: [PATCH 052/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E9=87=8D?=
 =?UTF-8?q?=E8=A4=87=E6=B3=A2=E5=BD=A2=E5=90=88=E6=88=90=E3=83=86=E3=82=B9?=
 =?UTF-8?q?=E3=83=88=E3=81=AE=E5=BB=83=E6=AD=A2=20(#912)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: 重複波形合成の廃止
---
 test/test_synthesis_engine.py | 190 ----------------------------------
 1 file changed, 190 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index ed6345674..98a941d9f 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -1,6 +1,4 @@
-import math
 from copy import deepcopy
-from random import random
 from typing import Union
 from unittest import TestCase
 from unittest.mock import Mock
@@ -21,7 +19,6 @@
     apply_speed_scale,
     apply_volume_scale,
     count_frame_per_unit,
-    mora_phoneme_list,
     pre_process,
     query_to_decoder_feature,
     raw_wave_to_output_wave,
@@ -709,190 +706,3 @@ def result_value(i: int):
                 index += 1
 
         self.assertEqual(result, true_result)
-
-    def synthesis_test_base(self, audio_query: AudioQuery):
-        # Inputs 音素長・モーラ音高の設定 & Expects 音素長・音素ID・モーラ音高の記録
-        #     Inputs
-        #         `audio_query`: 子音長0.1秒/母音長0.1秒/モーラ音高ランダム
-        #     Expects
-        #         `phoneme_length_list`: 音素長系列
-        #         `phoneme_id_list`: 音素ID系列
-        #         `f0_list`: モーラ音高系列
-        accent_phrases = audio_query.accent_phrases
-        phoneme_length_list = [0.0]
-        phoneme_id_list = [0]
-        f0_list = [0.0]
-        for accent_phrase in accent_phrases:
-            moras = accent_phrase.moras
-            for mora in moras:
-                if mora.consonant is not None:
-                    mora.consonant_length = 0.1
-                    phoneme_length_list.append(0.1)
-                    phoneme_id_list.append(OjtPhoneme(mora.consonant).phoneme_id)
-                mora.vowel_length = 0.2
-                phoneme_length_list.append(0.2)
-                phoneme_id_list.append(OjtPhoneme(mora.vowel).phoneme_id)
-                if mora.vowel not in unvoiced_mora_phoneme_list:
-                    mora.pitch = 5.0 + random()
-                f0_list.append(mora.pitch)
-            if accent_phrase.pause_mora is not None:
-                accent_phrase.pause_mora.vowel_length = 0.2
-                phoneme_length_list.append(0.2)
-                phoneme_id_list.append(OjtPhoneme("pau").phoneme_id)
-                f0_list.append(0.0)
-        phoneme_length_list.append(0.0)
-        phoneme_id_list.append(0)
-        f0_list.append(0.0)
-        phoneme_length_list[0] = audio_query.prePhonemeLength
-        phoneme_length_list[-1] = audio_query.postPhonemeLength
-
-        # Expects: speedScale適用
-        for i in range(len(phoneme_length_list)):
-            phoneme_length_list[i] /= audio_query.speedScale
-
-        # Outputs: MockCore入りTTSEngine の `.synthesis` 出力および core.decode_forward 引数
-        result = self.synthesis_engine.synthesis(query=audio_query, style_id=1)
-        decode_args = self.decode_mock.call_args[1]
-        list_length = decode_args["length"]
-
-        # Test: フレーム長
-        self.assertEqual(
-            list_length,
-            int(sum([round(p * 24000 / 256) for p in phoneme_length_list])),
-        )
-
-        # Expects: Apply/Convert/Rescale
-        num_phoneme = 45
-        # mora_phoneme_listのPhoneme ID版
-        mora_phoneme_id_list = [OjtPhoneme(p).phoneme_id for p in mora_phoneme_list]
-
-        f0 = []  # フレームごとの音高系列
-        phoneme = []  # フレームごとの音素onehotベクトル系列
-        f0_index = 0
-        mean_f0 = []
-        for i, phoneme_length in enumerate(phoneme_length_list):
-            # Expects: pitchScale適用
-            f0_single = numpy.array(f0_list[f0_index], dtype=numpy.float32) * (
-                2**audio_query.pitchScale
-            )
-            # Expects: フレームスケール化
-            for _ in range(int(round(phoneme_length * (24000 / 256)))):
-                f0.append([f0_single])
-                # Expects: 音素onehot化
-                phoneme_s = []
-                for _ in range(num_phoneme):
-                    phoneme_s.append(0)
-                # Expects: 音素フレームスケール化
-                phoneme_s[phoneme_id_list[i]] = 1
-                phoneme.append(phoneme_s)
-            # consonantとvowelを判別し、vowelであればf0_indexを一つ進める
-            if phoneme_id_list[i] in mora_phoneme_id_list:
-                if f0_single > 0:
-                    mean_f0.append(f0_single)
-                f0_index += 1
-        # Expects: 抑揚スケール適用
-        mean_f0 = numpy.array(mean_f0, dtype=numpy.float32).mean()
-        f0 = numpy.array(f0, dtype=numpy.float32)
-        for i in range(len(f0)):
-            if f0[i][0] != 0.0:
-                f0[i][0] = (f0[i][0] - mean_f0) * audio_query.intonationScale + mean_f0
-        phoneme = numpy.array(phoneme, dtype=numpy.float32)
-
-        assert_f0_count = 0
-
-        # Outputs: decode_forward `f0` 引数
-        decode_f0 = decode_args["f0"]
-
-        # Test: フレームごとの音高系列
-        # 乱数の影響で数値の位置がずれが生じるので、大半(4/5)があっていればよしとする
-        # また、上の部分のint(round(phoneme_length * (24000 / 256)))の影響で
-        # 本来のf0/phonemeとテスト生成したf0/phonemeの長さが変わることがあり、
-        # テスト生成したものが若干長くなることがあるので、本来のものの長さを基準にassertする
-        for i in range(len(decode_f0)):
-            # 乱数の影響等で数値にずれが生じるので、10の-5乗までの近似値であれば許容する
-            assert_f0_count += math.isclose(f0[i][0], decode_f0[i][0], rel_tol=10e-5)
-        self.assertTrue(assert_f0_count >= int(len(decode_f0) / 5) * 4)
-
-        assert_phoneme_count = 0
-
-        # Outputs: decode_forward `phoneme` 引数
-        decode_phoneme = decode_args["phoneme"]
-
-        # Test: フレームごとの音素系列
-        for i in range(len(decode_phoneme)):
-            assert_true_count = 0
-            for j in range(len(decode_phoneme[i])):
-                assert_true_count += bool(phoneme[i][j] == decode_phoneme[i][j])
-            assert_phoneme_count += assert_true_count == num_phoneme
-
-        self.assertTrue(assert_phoneme_count >= int(len(decode_phoneme) / 5) * 4)
-
-        # Test: スタイルID
-        self.assertEqual(decode_args["style_id"], 1)
-
-        # Expects: waveform (by mock)
-        true_result = decode_mock(list_length, num_phoneme, f0, phoneme, 1)
-        # Expects: 音量スケール適用
-        true_result *= audio_query.volumeScale
-
-        # TODO: resampyの部分は値の検証しようがないので、パスする
-        if audio_query.outputSamplingRate != 24000:
-            return
-
-        # Test:
-        assert_result_count = 0
-        for i in range(len(true_result)):
-            if audio_query.outputStereo:
-                assert_result_count += math.isclose(
-                    true_result[i], result[i][0], rel_tol=10e-5
-                ) and math.isclose(true_result[i], result[i][1], rel_tol=10e-5)
-            else:
-                assert_result_count += math.isclose(
-                    true_result[i], result[i], rel_tol=10e-5
-                )
-        self.assertTrue(assert_result_count >= int(len(true_result) / 5) * 4)
-
-    def test_synthesis(self):
-        audio_query = _gen_query(
-            deepcopy(self.accent_phrases_hello_hiho),
-            prePhonemeLength=0.1,
-            postPhonemeLength=0.1,
-        )
-
-        self.synthesis_test_base(audio_query)
-
-        # speed scaleのテスト
-        audio_query.speedScale = 1.2
-        self.synthesis_test_base(audio_query)
-
-        # pitch scaleのテスト
-        audio_query.pitchScale = 1.5
-        audio_query.speedScale = 1.0
-        self.synthesis_test_base(audio_query)
-
-        # intonation scaleのテスト
-        audio_query.pitchScale = 1.0
-        audio_query.intonationScale = 1.4
-        self.synthesis_test_base(audio_query)
-
-        # volume scaleのテスト
-        audio_query.intonationScale = 1.0
-        audio_query.volumeScale = 2.0
-        self.synthesis_test_base(audio_query)
-
-        # pre/post phoneme lengthのテスト
-        audio_query.volumeScale = 1.0
-        audio_query.prePhonemeLength = 0.5
-        audio_query.postPhonemeLength = 0.5
-        self.synthesis_test_base(audio_query)
-
-        # output sampling rateのテスト
-        audio_query.prePhonemeLength = 0.1
-        audio_query.postPhonemeLength = 0.1
-        audio_query.outputSamplingRate = 48000
-        self.synthesis_test_base(audio_query)
-
-        # output stereoのテスト
-        audio_query.outputSamplingRate = 24000
-        audio_query.outputStereo = True
-        self.synthesis_test_base(audio_query)

From 6759df6d47b3961328b56b9e4f1723554dcf025e Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 12:02:30 +0900
Subject: [PATCH 053/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20OpenJTalk?=
 =?UTF-8?q?=E3=83=89=E3=83=A1=E3=82=A4=E3=83=B3=E3=81=AE=E9=9A=94=E9=9B=A2?=
 =?UTF-8?q?=20(#901)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: OpenJTalkドメインの隔離

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_mora_to_text.py                     |  2 +-
 .../tts_pipeline/full_context_label.py        | 90 ++++++++++++++++++
 .../tts_pipeline/tts_engine_base.py           | 93 +------------------
 3 files changed, 93 insertions(+), 92 deletions(-)

diff --git a/test/test_mora_to_text.py b/test/test_mora_to_text.py
index f8f531008..279953379 100644
--- a/test/test_mora_to_text.py
+++ b/test/test_mora_to_text.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
-from voicevox_engine.tts_pipeline.tts_engine_base import mora_to_text
+from voicevox_engine.tts_pipeline.full_context_label import mora_to_text
 
 
 class TestMoraToText(TestCase):
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index ff843dcdc..a62adfb1e 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -5,6 +5,10 @@
 
 import pyopenjtalk
 
+from ..model import AccentPhrase as VvAccentPhrase  # NOTE: 後にOjtコンテナクラスをリネーム予定
+from ..model import Mora as VvMora
+from .mora_list import openjtalk_mora2text
+
 
 @dataclass
 class Label:
@@ -424,3 +428,89 @@ def extract_full_context_label(text: str):
     labels = [Label.from_feature(feature) for feature in features]
     utterance = Utterance.from_labels(labels)
     return utterance
+
+
+def mora_to_text(mora: str) -> str:
+    """
+    Parameters
+    ----------
+    mora : str
+        モーラ音素文字列
+    Returns
+    -------
+    mora : str
+        モーラ音素文字列
+    """
+    if mora[-1:] in ["A", "I", "U", "E", "O"]:
+        # 無声化母音を小文字に
+        mora = mora[:-1] + mora[-1].lower()
+    if mora in openjtalk_mora2text:
+        return openjtalk_mora2text[mora]
+    else:
+        return mora
+
+
+def full_context_label_moras_to_moras(full_context_moras: list[Mora]) -> list[VvMora]:
+    """
+    Moraクラスのキャスト (`Mora` -> `model.Mora`)
+    Parameters
+    ----------
+    full_context_moras : List[Mora]
+        モーラ系列
+    Returns
+    -------
+    moras : List[Mora]
+        モーラ系列。音素長・モーラ音高は 0 初期化
+    """
+    return [
+        VvMora(
+            text=mora_to_text("".join([p.phoneme for p in mora.phonemes])),
+            consonant=(mora.consonant.phoneme if mora.consonant is not None else None),
+            consonant_length=0 if mora.consonant is not None else None,
+            vowel=mora.vowel.phoneme,
+            vowel_length=0,
+            pitch=0,
+        )
+        for mora in full_context_moras
+    ]
+
+
+def utterance_to_accent_phrases(utterance: Utterance) -> list[VvAccentPhrase]:
+    """Utteranceインスタンスをアクセント句系列へドメイン変換する"""
+    return [
+        VvAccentPhrase(
+            moras=full_context_label_moras_to_moras(accent_phrase.moras),
+            accent=accent_phrase.accent,
+            pause_mora=(
+                VvMora(
+                    text="、",
+                    consonant=None,
+                    consonant_length=None,
+                    vowel="pau",
+                    vowel_length=0,
+                    pitch=0,
+                )
+                if (
+                    i_accent_phrase == len(breath_group.accent_phrases) - 1
+                    and i_breath_group != len(utterance.breath_groups) - 1
+                )
+                else None
+            ),
+            is_interrogative=accent_phrase.is_interrogative,
+        )
+        for i_breath_group, breath_group in enumerate(utterance.breath_groups)
+        for i_accent_phrase, accent_phrase in enumerate(breath_group.accent_phrases)
+    ]
+
+
+def text_to_accent_phrases(text: str) -> list[VvAccentPhrase]:
+    """日本語テキストからアクセント句系列を生成"""
+    if len(text.strip()) == 0:
+        return []
+
+    # 音素とアクセントの推定
+    utterance = extract_full_context_label(text)
+    if len(utterance.breath_groups) == 0:
+        return []
+
+    return utterance_to_accent_phrases(utterance)
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index c7c5f21ff..4357be6d0 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -5,8 +5,7 @@
 import numpy as np
 
 from ..model import AccentPhrase, AudioQuery, Mora
-from . import full_context_label
-from .full_context_label import Utterance, extract_full_context_label
+from .full_context_label import text_to_accent_phrases
 from .mora_list import openjtalk_mora2text
 
 # 疑問文語尾定数
@@ -15,26 +14,6 @@
 UPSPEAK_PITCH_MAX = 6.5
 
 
-def mora_to_text(mora: str) -> str:
-    """
-    Parameters
-    ----------
-    mora : str
-        モーラ音素文字列
-    Returns
-    -------
-    mora : str
-        モーラ音素文字列
-    """
-    if mora[-1:] in ["A", "I", "U", "E", "O"]:
-        # 無声化母音を小文字に
-        mora = mora[:-1] + mora[-1].lower()
-    if mora in openjtalk_mora2text:
-        return openjtalk_mora2text[mora]
-    else:
-        return mora
-
-
 def apply_interrogative_upspeak(
     accent_phrases: list[AccentPhrase], enable_interrogative_upspeak: bool
 ) -> list[AccentPhrase]:
@@ -62,74 +41,6 @@ def apply_interrogative_upspeak(
     return accent_phrases
 
 
-def full_context_label_moras_to_moras(
-    full_context_moras: List[full_context_label.Mora],
-) -> List[Mora]:
-    """
-    Moraクラスのキャスト (`full_context_label.Mora` -> `Mora`)
-    Parameters
-    ----------
-    full_context_moras : List[full_context_label.Mora]
-        モーラ系列
-    Returns
-    -------
-    moras : List[Mora]
-        モーラ系列。音素長・モーラ音高は 0 初期化
-    """
-    return [
-        Mora(
-            text=mora_to_text("".join([p.phoneme for p in mora.phonemes])),
-            consonant=(mora.consonant.phoneme if mora.consonant is not None else None),
-            consonant_length=0 if mora.consonant is not None else None,
-            vowel=mora.vowel.phoneme,
-            vowel_length=0,
-            pitch=0,
-        )
-        for mora in full_context_moras
-    ]
-
-
-def utterance_to_accent_phrases(utterance: Utterance) -> list[AccentPhrase]:
-    """Utteranceインスタンスをアクセント句系列へドメイン変換する"""
-    return [
-        AccentPhrase(
-            moras=full_context_label_moras_to_moras(accent_phrase.moras),
-            accent=accent_phrase.accent,
-            pause_mora=(
-                Mora(
-                    text="、",
-                    consonant=None,
-                    consonant_length=None,
-                    vowel="pau",
-                    vowel_length=0,
-                    pitch=0,
-                )
-                if (
-                    i_accent_phrase == len(breath_group.accent_phrases) - 1
-                    and i_breath_group != len(utterance.breath_groups) - 1
-                )
-                else None
-            ),
-            is_interrogative=accent_phrase.is_interrogative,
-        )
-        for i_breath_group, breath_group in enumerate(utterance.breath_groups)
-        for i_accent_phrase, accent_phrase in enumerate(breath_group.accent_phrases)
-    ]
-
-
-def test_to_accent_phrases(text: str) -> list[AccentPhrase]:
-    """日本語テキストからアクセント句系列を生成"""
-    if len(text.strip()) == 0:
-        return []
-
-    # 音素とアクセントの推定
-    utterance = extract_full_context_label(text)
-    if len(utterance.breath_groups) == 0:
-        return []
-
-    return utterance_to_accent_phrases(utterance)
-
-
 class TTSEngineBase(metaclass=ABCMeta):
     @property
     @abstractmethod
@@ -260,7 +171,7 @@ def create_accent_phrases(self, text: str, style_id: int) -> List[AccentPhrase]:
             アクセント句系列
         """
         # 音素とアクセントの推定
-        accent_phrases = test_to_accent_phrases(text)
+        accent_phrases = text_to_accent_phrases(text)
 
         # 音素長・モーラ音高の推定と更新
         accent_phrases = self.replace_mora_data(

From f251dfed15385cdd5f7b17b52b3d8f502989abe3 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 12:14:46 +0900
Subject: [PATCH 054/177] =?UTF-8?q?=E4=BF=AE=E6=AD=A3:=20=E5=AE=9A?=
 =?UTF-8?q?=E7=BE=A9=E7=84=A1=E3=81=97Mora=E5=BC=95=E6=95=B0=E3=81=AE?=
 =?UTF-8?q?=E5=89=8A=E9=99=A4=20(#914)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix: 定義無しMora引数の削除
---
 voicevox_engine/tts_pipeline/kana_parser.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/kana_parser.py b/voicevox_engine/tts_pipeline/kana_parser.py
index 87a4624a8..41b32575a 100644
--- a/voicevox_engine/tts_pipeline/kana_parser.py
+++ b/voicevox_engine/tts_pipeline/kana_parser.py
@@ -28,7 +28,7 @@
 _PAUSE_DELIMITER = "、"  # ポーズ有りアクセント句境界
 _WIDE_INTERROGATION_MARK = "？"  # 疑問形
 
-# AquesTalk風記法とモーラの対応（音素長・音高 0 初期化、疑問形 off 初期化）
+# AquesTalk風記法とモーラの対応（音素長・音高 0 初期化）
 _text2mora_with_unvoice = {}
 for text, (consonant, vowel) in openjtalk_text2mora.items():
     _text2mora_with_unvoice[text] = Mora(
@@ -38,7 +38,6 @@
         vowel=vowel,
         vowel_length=0,
         pitch=0,
-        is_interrogative=False,
     )
     if vowel in ["a", "i", "u", "e", "o"]:
         # 「`_` で無声化」の実装
@@ -50,7 +49,6 @@
             vowel=vowel.upper(),
             vowel_length=0,
             pitch=0,
-            is_interrogative=False,
         )
 
 

From 3935862eec00059ea8e5f87455e2fa388e383521 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 12:20:33 +0900
Subject: [PATCH 055/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20API=E3=83=89?=
 =?UTF-8?q?=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E7=94=9F=E6=88=90?=
 =?UTF-8?q?=E3=82=B3=E3=83=BC=E3=83=89=E3=81=AE=E6=AE=B5=E9=9A=8E=E5=8C=96?=
 =?UTF-8?q?=20(#865)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: APIドキュメント生成コードの段階化

* Fix: lint

* Refactor: `Path` ベースファイル保存への変更

* Fix: lint
---
 .github/workflows/upload-gh-pages.yml |  2 ++
 build_util/make_docs.py               | 48 ++++++++++++++++++---------
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/upload-gh-pages.yml b/.github/workflows/upload-gh-pages.yml
index e545c0bf6..3efc702b4 100644
--- a/.github/workflows/upload-gh-pages.yml
+++ b/.github/workflows/upload-gh-pages.yml
@@ -1,3 +1,5 @@
+# API docs HTML ファイルを生成し、`gh-pages` ブランチへの push によって GitHub Pages 上のドキュメントとして公開
+
 name: upload-docs
 
 on:
diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index 7bf1f0b6e..ad7135e5d 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -1,4 +1,5 @@
 import json
+from pathlib import Path
 
 from voicevox_engine.dev.core import mock as core
 from voicevox_engine.dev.synthesis_engine.mock import MockTTSEngine
@@ -6,20 +7,12 @@
 from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
 from voicevox_engine.utility import engine_root
 
-if __name__ == "__main__":
-    import run
 
-    app = run.generate_app(
-        synthesis_engines={"mock": MockTTSEngine(speakers=core.metas())},
-        latest_core_version="mock",
-        setting_loader=SettingLoader(USER_SETTING_PATH),
-        preset_manager=PresetManager(  # FIXME: impl MockPresetManager
-            preset_path=engine_root() / "presets.yaml",
-        ),
-    )
-    with open("docs/api/index.html", "w") as f:
-        f.write(
-            """<!DOCTYPE html>
+def generate_api_docs_html(schema: str) -> str:
+    """OpenAPI schema から API ドキュメント HTML を生成する"""
+
+    return (
+        """<!DOCTYPE html>
 <html lang="ja">
 <head>
     <title>voicevox_engine API Document</title>
@@ -34,5 +27,30 @@
     </script>
 </body>
 </html>"""
-            % json.dumps(app.openapi())
-        )
+        % schema
+    )
+
+
+if __name__ == "__main__":
+
+    import run
+
+    # FastAPI の機能を用いて OpenAPI schema を生成する
+    app = run.generate_app(
+        synthesis_engines={"mock": MockTTSEngine(speakers=core.metas())},
+        latest_core_version="mock",
+        setting_loader=SettingLoader(USER_SETTING_PATH),
+        preset_manager=PresetManager(  # FIXME: impl MockPresetManager
+            preset_path=engine_root() / "presets.yaml",
+        ),
+    )
+    api_schema = json.dumps(app.openapi())
+
+    # API ドキュメント HTML を生成する
+    api_docs_html = generate_api_docs_html(api_schema)
+
+    # HTML ファイルとして保存する
+    api_docs_root = Path("docs/api")  # 'upload-docs' workflow の対象
+    output_path = api_docs_root / "index.html"
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(api_docs_html)

From 6e239fea0f62b558bd3f36ef2bc19f45fed663b9 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 12:22:37 +0900
Subject: [PATCH 056/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20CDLL=E5=9E=8B?=
 =?UTF-8?q?=E4=BB=98=E3=81=91=E3=81=AE=E5=88=87=E3=82=8A=E5=87=BA=E3=81=97?=
 =?UTF-8?q?=20(#862)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor: CDLL型付けの切り出し
---
 voicevox_engine/core_wrapper.py | 78 ++++++++++++++++++++-------------
 1 file changed, 48 insertions(+), 30 deletions(-)

diff --git a/voicevox_engine/core_wrapper.py b/voicevox_engine/core_wrapper.py
index dcb2da101..f7f697ffe 100644
--- a/voicevox_engine/core_wrapper.py
+++ b/voicevox_engine/core_wrapper.py
@@ -378,14 +378,18 @@ def load_core(core_dir: Path, use_gpu: bool) -> CDLL:
         raise RuntimeError(f"このコンピュータのアーキテクチャ {platform.machine()} で利用可能なコアがありません")
 
 
+def _type_initialize(core_cdll: CDLL) -> None:
+    """コアDLL `initialize` 関数を型付けする"""
+    core_cdll.initialize.restype = c_bool
+
+
+def _type_metas(core_cdll: CDLL) -> None:
+    """コアDLL `metas` 関数を型付けする"""
+    core_cdll.metas.restype = c_char_p
+
+
 def _type_yukarin_s_forward(core_cdll: CDLL) -> None:
-    """
-    コアDLL `yukarin_s_forward` 関数の型付け
-    Parameters
-    ----------
-    core_cdll : CDLL
-        コアDLL
-    """
+    """コアDLL `yukarin_s_forward` 関数を型付けする"""
     core_cdll.yukarin_s_forward.argtypes = (
         c_int,
         POINTER(c_long),
@@ -396,13 +400,7 @@ def _type_yukarin_s_forward(core_cdll: CDLL) -> None:
 
 
 def _type_yukarin_sa_forward(core_cdll: CDLL) -> None:
-    """
-    コアDLL `yukarin_sa_forward` 関数の型付け
-    Parameters
-    ----------
-    core_cdll : CDLL
-        コアDLL
-    """
+    """コアDLL `yukarin_sa_forward` 関数を型付けする"""
     core_cdll.yukarin_sa_forward.argtypes = (
         c_int,
         POINTER(c_long),
@@ -418,13 +416,7 @@ def _type_yukarin_sa_forward(core_cdll: CDLL) -> None:
 
 
 def _type_decode_forward(core_cdll: CDLL) -> None:
-    """
-    コアDLL `decode_forward` 関数の型付け
-    Parameters
-    ----------
-    core_cdll : CDLL
-        コアDLL
-    """
+    """コアDLL `decode_forward` 関数を型付けする"""
     core_cdll.decode_forward.argtypes = (
         c_int,
         c_int,
@@ -436,6 +428,33 @@ def _type_decode_forward(core_cdll: CDLL) -> None:
     core_cdll.decode_forward.restype = c_bool
 
 
+def _type_last_error_message(core_cdll: CDLL) -> None:
+    """コアDLL `last_error_message` 関数を型付けする"""
+    core_cdll.last_error_message.restype = c_char_p
+
+
+def _type_load_model(core_cdll: CDLL) -> None:
+    """コアDLL `load_model` 関数を型付けする"""
+    core_cdll.load_model.argtypes = (c_long,)
+    core_cdll.load_model.restype = c_bool
+
+
+def _type_is_model_loaded(core_cdll: CDLL) -> None:
+    """コアDLL `is_model_loaded` 関数を型付けする"""
+    core_cdll.is_model_loaded.argtypes = (c_long,)
+    core_cdll.is_model_loaded.restype = c_bool
+
+
+def _type_supported_devices(core_cdll: CDLL) -> None:
+    """コアDLL `supported_devices` 関数を型付けする"""
+    core_cdll.supported_devices.restype = c_char_p
+
+
+def _type_finalize(core_cdll: CDLL) -> None:
+    """コアDLL `finalize` 関数を型付けする"""
+    core_cdll.finalize.restype = None
+
+
 class CoreWrapper:
     def __init__(
         self,
@@ -449,12 +468,13 @@ def __init__(
 
         self.core = load_core(core_dir, use_gpu)
 
-        self.core.initialize.restype = c_bool
-        self.core.metas.restype = c_char_p
+        _type_initialize(self.core)
+
+        _type_metas(self.core)
         _type_yukarin_s_forward(self.core)
         _type_yukarin_sa_forward(self.core)
         _type_decode_forward(self.core)
-        self.core.last_error_message.restype = c_char_p
+        _type_last_error_message(self.core)
 
         self.exist_supported_devices = False
         self.exist_finalize = False
@@ -469,17 +489,15 @@ def __init__(
             model_type = "onnxruntime"
             self.exist_load_model = True
             self.exist_is_model_loaded = True
-            self.core.load_model.argtypes = (c_long,)
-            self.core.load_model.restype = c_bool
-            self.core.is_model_loaded.argtypes = (c_long,)
-            self.core.is_model_loaded.restype = c_bool
+            _type_load_model(self.core)
+            _type_is_model_loaded(self.core)
         else:
             model_type = _check_core_type(core_dir)
         assert model_type is not None
 
         if model_type == "onnxruntime":
-            self.core.supported_devices.restype = c_char_p
-            self.core.finalize.restype = None
+            _type_supported_devices(self.core)
+            _type_finalize(self.core)
             self.exist_supported_devices = True
             self.exist_finalize = True
             exist_cpu_num_threads = True

From 68a439d979a0b9186bfa9009dece3309ad956696 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 22 Dec 2023 12:58:11 +0900
Subject: [PATCH 057/177] =?UTF-8?q?=E5=89=8A=E9=99=A4:=20`Utterance.phonem?=
 =?UTF-8?q?es`=20=E5=86=85=E3=82=B3=E3=83=B3=E3=83=86=E3=82=AD=E3=82=B9?=
 =?UTF-8?q?=E3=83=88=E6=9B=B4=E6=96=B0=20(#905)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Remove: `Utterance.phonemes` 内コンテキスト更新

* Remove: Utterance.phonemes 内コンテキスト更新テスト
---
 test/test_full_context_label.py               | 69 -------------------
 .../tts_pipeline/full_context_label.py        | 62 -----------------
 2 files changed, 131 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 3456c45df..621769e20 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -1,5 +1,4 @@
 from copy import deepcopy
-from itertools import chain
 from unittest import TestCase
 
 from voicevox_engine.tts_pipeline.full_context_label import (
@@ -341,73 +340,5 @@ def test_phonemes(self):
         expects_hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
         self.assertEqual(outputs_hello_hiho, expects_hello_hiho)
 
-        changed_utterance = Utterance.from_labels(self.utterance_hello_hiho.phonemes)
-        self.assertEqual(len(changed_utterance.breath_groups), 2)
-        accent_phrases = list(
-            chain.from_iterable(
-                breath_group.accent_phrases
-                for breath_group in changed_utterance.breath_groups
-            )
-        )
-        for prev, cent, post in zip(
-            [None] + accent_phrases[:-1],
-            accent_phrases,
-            accent_phrases[1:] + [None],
-        ):
-            mora_num = len(cent.moras)
-            accent = cent.accent
-
-            if prev is not None:
-                for phoneme in prev.phonemes:
-                    self.assertEqual(phoneme.contexts["g1"], str(mora_num))
-                    self.assertEqual(phoneme.contexts["g2"], str(accent))
-
-            if post is not None:
-                for phoneme in post.phonemes:
-                    self.assertEqual(phoneme.contexts["e1"], str(mora_num))
-                    self.assertEqual(phoneme.contexts["e2"], str(accent))
-
-            for phoneme in cent.phonemes:
-                self.assertEqual(
-                    phoneme.contexts["k2"],
-                    str(
-                        sum(
-                            [
-                                len(breath_group.accent_phrases)
-                                for breath_group in changed_utterance.breath_groups
-                            ]
-                        )
-                    ),
-                )
-
-        for prev, cent, post in zip(
-            [None] + changed_utterance.breath_groups[:-1],
-            changed_utterance.breath_groups,
-            changed_utterance.breath_groups[1:] + [None],
-        ):
-            accent_phrase_num = len(cent.accent_phrases)
-
-            if prev is not None:
-                for phoneme in prev.phonemes:
-                    self.assertEqual(phoneme.contexts["j1"], str(accent_phrase_num))
-
-            if post is not None:
-                for phoneme in post.phonemes:
-                    self.assertEqual(phoneme.contexts["h1"], str(accent_phrase_num))
-
-            for phoneme in cent.phonemes:
-                self.assertEqual(phoneme.contexts["i1"], str(accent_phrase_num))
-                self.assertEqual(
-                    phoneme.contexts["i5"],
-                    str(accent_phrases.index(cent.accent_phrases[0]) + 1),
-                )
-                self.assertEqual(
-                    phoneme.contexts["i6"],
-                    str(
-                        len(accent_phrases)
-                        - accent_phrases.index(cent.accent_phrases[0])
-                    ),
-                )
-
     def test_labels(self):
         self.assertEqual(features(self.utterance_hello_hiho), self.test_case_hello_hiho)
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index a62adfb1e..966337b89 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -339,68 +339,6 @@ def phonemes(self):
         labels : list[Label]
             Utteranceクラスに直接的・間接的に含まれる、全てのLabelを返す
         """
-        accent_phrases = list(
-            chain.from_iterable(
-                breath_group.accent_phrases for breath_group in self.breath_groups
-            )
-        )
-        for prev, cent, post in zip(
-            [None] + accent_phrases[:-1],
-            accent_phrases,
-            accent_phrases[1:] + [None],
-        ):
-            mora_num = len(cent.moras)
-            accent = cent.accent
-
-            if prev is not None:
-                prev.set_context("g1", str(mora_num))
-                prev.set_context("g2", str(accent))
-
-            if post is not None:
-                post.set_context("e1", str(mora_num))
-                post.set_context("e2", str(accent))
-
-            cent.set_context("f1", str(mora_num))
-            cent.set_context("f2", str(accent))
-            for i_mora, mora in enumerate(cent.moras):
-                mora.set_context("a1", str(i_mora - accent + 1))
-                mora.set_context("a2", str(i_mora + 1))
-                mora.set_context("a3", str(mora_num - i_mora))
-
-        for prev, cent, post in zip(
-            [None] + self.breath_groups[:-1],
-            self.breath_groups,
-            self.breath_groups[1:] + [None],
-        ):
-            accent_phrase_num = len(cent.accent_phrases)
-
-            if prev is not None:
-                prev.set_context("j1", str(accent_phrase_num))
-
-            if post is not None:
-                post.set_context("h1", str(accent_phrase_num))
-
-            cent.set_context("i1", str(accent_phrase_num))
-            cent.set_context(
-                "i5", str(accent_phrases.index(cent.accent_phrases[0]) + 1)
-            )
-            cent.set_context(
-                "i6",
-                str(len(accent_phrases) - accent_phrases.index(cent.accent_phrases[0])),
-            )
-
-        self.set_context(
-            "k2",
-            str(
-                sum(
-                    [
-                        len(breath_group.accent_phrases)
-                        for breath_group in self.breath_groups
-                    ]
-                )
-            ),
-        )
-
         labels: list[Label] = []
         for i in range(len(self.pauses)):
             if self.pauses[i] is not None:

From 0cf24277469c63741a02ef8ac3392243fabbe7c4 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Sat, 23 Dec 2023 01:02:34 +0900
Subject: [PATCH 058/177] =?UTF-8?q?FIX:=20=E3=83=97=E3=83=AD=E3=83=91?=
 =?UTF-8?q?=E3=83=86=E3=82=A3=E5=90=8D=E3=81=AE=E3=83=9F=E3=82=B9=E3=82=92?=
 =?UTF-8?q?=E4=BF=AE=E6=AD=A3=20(#922)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 voicevox_engine/metas/MetasStore.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 76eceff8a..3eb1e4eaa 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -47,7 +47,7 @@ def load_combined_metas(self, engine: "TTSEngineBase") -> List[Speaker]:
         # エンジンに含まれる話者メタ情報との統合
         return [
             Speaker(
-                **self.self._loaded_metas[speaker_meta.speaker_uuid].dict(),
+                **self._loaded_metas[speaker_meta.speaker_uuid].dict(),
                 **speaker_meta.dict(),
             )
             for speaker_meta in core_metas

From 5600a1d661d1cb07a6cfebf63d3c3bd58e4cd390 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 23 Dec 2023 02:21:55 +0900
Subject: [PATCH 059/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`CoreAdaptor`=20?=
 =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=B5=E3=83=BC=E3=83=89=E6=A9=9F=E8=83=BD?=
 =?UTF-8?q?=20(#915)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `CoreAdaptor` ファサード機能

* ファサードじゃなさそう

* 置換ミス

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/tts_pipeline/tts_engine.py | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index b92432255..56f8414db 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -300,7 +300,8 @@ def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
             return True  # コアが古い場合はどうしようもないのでTrueを返す
 
     def safe_yukarin_s_forward(self, phoneme_list_s: ndarray, style_id: int) -> ndarray:
-        # TODO: `self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)` のファサード的移植
+        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
         with self.mutex:
             phoneme_length = self.core.yukarin_s_forward(
                 length=len(phoneme_list_s),
@@ -319,7 +320,8 @@ def safe_yukarin_sa_forward(
         end_accent_phrase_list: ndarray,
         style_id: int,
     ) -> ndarray:
-        # TODO: `self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)` のファサード的移植
+        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
         with self.mutex:
             f0_list = self.core.yukarin_sa_forward(
                 length=vowel_phoneme_list.shape[0],
@@ -336,7 +338,8 @@ def safe_yukarin_sa_forward(
     def safe_decode_forward(
         self, phoneme: ndarray, f0: ndarray, style_id: int
     ) -> tuple[ndarray, int]:
-        # TODO: `self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)` のファサード的移植
+        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
         with self.mutex:
             wave = self.core.decode_forward(
                 length=phoneme.shape[0],
@@ -379,9 +382,6 @@ def replace_phoneme_length(
         self, accent_phrases: list[AccentPhrase], style_id: int
     ) -> list[AccentPhrase]:
         """アクセント句系列に含まれるモーラの音素長属性をスタイルに合わせて更新する"""
-        # モデルがロードされていない場合はロードする
-        self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)
-
         # モーラ系列を抽出する
         moras = to_flatten_moras(accent_phrases)
 
@@ -424,8 +424,6 @@ def replace_mora_pitch(
         accent_phrases : List[AccentPhrase]
             音高(ピッチ)が設定されたアクセント句モデルのリスト
         """
-        # モデルがロードされていない場合はロードする
-        self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)
         # numpy.concatenateが空リストだとエラーを返すのでチェック
         if len(accent_phrases) == 0:
             return []
@@ -562,14 +560,7 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
         wave : numpy.ndarray
             音声合成結果
         """
-        # モデルがロードされていない場合はロードする
-        self.core.initialize_style_id_synthesis(style_id, skip_reinit=True)
-
         phoneme, f0 = query_to_decoder_feature(query)
-
-        # 今まで生成された情報をdecode_forwardにかけ、推論器によって音声波形を生成する
         raw_wave, sr_raw_wave = self.core.safe_decode_forward(phoneme, f0, style_id)
-
         wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
-
         return wave

From f62ce80011d4f499699496f68a7c6cbb42d0ffeb Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 23 Dec 2023 02:23:57 +0900
Subject: [PATCH 060/177] =?UTF-8?q?=E4=BF=AE=E6=AD=A3:=20`=5Fsynthesis=5Fi?=
 =?UTF-8?q?mpl()`=20=E3=81=AE=E7=9B=B4=E6=8E=A5=E3=82=B3=E3=83=BC=E3=83=AB?=
 =?UTF-8?q?=20(#916)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: `_synthesis_impl()` の直接コール

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/cancellable_engine.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index 140a7f138..32bf29555 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -240,7 +240,8 @@ def start_synthesis_subprocess(
                 # バージョンが見つからないエラー
                 sub_proc_con.send("")
                 continue
-            wave = _engine._synthesis_impl(query, style_id)
+            # FIXME: enable_interrogative_upspeakフラグをWebAPIから受け渡してくる
+            wave = _engine.synthesis(query, style_id, False)
             with NamedTemporaryFile(delete=False) as f:
                 soundfile.write(
                     file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"

From d670397f5c017bbd080ba41424aaf4b6fdc472e4 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 23 Dec 2023 02:26:31 +0900
Subject: [PATCH 061/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20=E3=83=95?=
 =?UTF-8?q?=E3=83=AB=E3=82=B3=E3=83=B3=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E3=83=A9=E3=83=99=E3=83=AB=E7=B3=BB=20`set=5Fcontext()`=20(#91?=
 =?UTF-8?q?7)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove: `set_context` 廃止
---
 test/test_full_context_label.py               | 21 --------
 .../tts_pipeline/full_context_label.py        | 54 -------------------
 2 files changed, 75 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 621769e20..a162855ed 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -1,4 +1,3 @@
-from copy import deepcopy
 from unittest import TestCase
 
 from voicevox_engine.tts_pipeline.full_context_label import (
@@ -253,13 +252,6 @@ def test_labels(self) -> None:
         self.assert_labels(self.mora_hiho_3, 15, 17)
         self.assert_labels(self.mora_hiho_4, 17, 19)
 
-    def test_set_context(self):
-        # 値を書き換えるので、他のテストに影響を出さないためにdeepcopyする
-        mora_hello_1 = deepcopy(self.mora_hello_1)
-        # phonemeにあたる"p3"を書き換える
-        mora_hello_1.set_context("p3", "a")
-        self.assertEqual(jointed_phonemes(mora_hello_1), "aa")
-
 
 class TestAccentPhrase(TestBasePhonemes):
     def setUp(self) -> None:
@@ -277,12 +269,6 @@ def test_accent(self):
         self.assertEqual(self.accent_phrase_hello.accent, 5)
         self.assertEqual(self.accent_phrase_hiho.accent, 1)
 
-    def test_set_context(self):
-        accent_phrase_hello = deepcopy(self.accent_phrase_hello)
-        # phonemeにあたる"p3"を書き換える
-        accent_phrase_hello.set_context("p3", "a")
-        self.assertEqual(jointed_phonemes(accent_phrase_hello), "aaaaaaaaa")
-
     def test_phonemes(self):
         outputs_hello = space_jointed_phonemes(self.accent_phrase_hello)
         outputs_hiho = space_jointed_phonemes(self.accent_phrase_hiho)
@@ -308,13 +294,6 @@ def setUp(self) -> None:
             self.phonemes_hello_hiho[11:19]
         )
 
-    def test_set_context(self):
-        # 値を書き換えるので、他のテストに影響を出さないためにdeepcopyする
-        breath_group_hello = deepcopy(self.breath_group_hello)
-        # phonemeにあたる"p3"を書き換える
-        breath_group_hello.set_context("p3", "a")
-        self.assertEqual(jointed_phonemes(breath_group_hello), "aaaaaaaaa")
-
     def test_phonemes(self):
         outputs_hello = space_jointed_phonemes(self.breath_group_hello)
         outputs_hiho = space_jointed_phonemes(self.breath_group_hiho)
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index 966337b89..3ed92faed 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -74,21 +74,6 @@ class Mora:
     consonant: Label | None
     vowel: Label
 
-    def set_context(self, key: str, value: str):
-        """
-        Moraクラス内に含まれるLabelのcontextのうち、指定されたキーの値を変更する
-        consonantが存在する場合は、vowelと同じようにcontextを変更する
-        Parameters
-        ----------
-        key : str
-            変更したいcontextのキー
-        value : str
-            変更したいcontextの値
-        """
-        self.vowel.contexts[key] = value
-        if self.consonant is not None:
-            self.consonant.contexts[key] = value
-
     @property
     def phonemes(self):
         """このモーラを構成するラベルリスト。母音ラベルのみの場合は [母音ラベル,]、子音ラベルもある場合は [子音ラベル, 母音ラベル]。
@@ -168,19 +153,6 @@ def from_labels(cls, labels: list[Label]) -> Self:
 
         return accent_phrase
 
-    def set_context(self, key: str, value: str):
-        """
-        AccentPhraseに間接的に含まれる全てのLabelのcontextの、指定されたキーの値を変更する
-        Parameters
-        ----------
-        key : str
-            変更したいcontextのキー
-        value : str
-            変更したいcontextの値
-        """
-        for mora in self.moras:
-            mora.set_context(key, value)
-
     @property
     def phonemes(self):
         """
@@ -239,19 +211,6 @@ def from_labels(cls, labels: list[Label]) -> Self:
 
         return breath_group
 
-    def set_context(self, key: str, value: str):
-        """
-        BreathGroupに間接的に含まれる全てのLabelのcontextの、指定されたキーの値を変更する
-        Parameters
-        ----------
-        key : str
-            変更したいcontextのキー
-        value : str
-            変更したいcontextの値
-        """
-        for accent_phrase in self.accent_phrases:
-            accent_phrase.set_context(key, value)
-
     @property
     def phonemes(self):
         """
@@ -316,19 +275,6 @@ def from_labels(cls, labels: list[Label]) -> Self:
 
         return utterance
 
-    def set_context(self, key: str, value: str):
-        """
-        Utteranceに間接的に含まれる全てのLabelのcontextの、指定されたキーの値を変更する
-        Parameters
-        ----------
-        key : str
-            変更したいcontextのキー
-        value : str
-            変更したいcontextの値
-        """
-        for breath_group in self.breath_groups:
-            breath_group.set_context(key, value)
-
     @property
     def phonemes(self):
         """

From f0dad1c225bd968d6b59404dbde6eb837fd91773 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 23 Dec 2023 02:29:36 +0900
Subject: [PATCH 062/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`.phonemes`=20->?=
 =?UTF-8?q?=20`.labels`=20=E3=83=AA=E3=83=8D=E3=83=BC=E3=83=A0=20(#918)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `.phonemes` -> `.labels` リネーム
---
 test/test_full_context_label.py               | 10 ++--
 .../tts_pipeline/full_context_label.py        | 47 +++++--------------
 2 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index a162855ed..ad951d0fd 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -33,7 +33,7 @@ def contexts_to_feature(contexts: dict[str, str]) -> str:
 
 def features(ojt_container: OjtContainer):
     """コンテナインスタンスに直接的・間接的に含まれる全ての feature を返す"""
-    return [contexts_to_feature(p.contexts) for p in ojt_container.phonemes]
+    return [contexts_to_feature(p.contexts) for p in ojt_container.labels]
 
 
 class TestBasePhonemes(TestCase):
@@ -131,16 +131,12 @@ def setUp(self):
 
 def jointed_phonemes(ojt_container: OjtContainer) -> str:
     """コンテナインスタンスに直接的・間接的に含まれる全ラベルの音素文字を結合してを返す"""
-    return "".join([label.phoneme for label in ojt_container.phonemes])
-    # NOTE: `.phonemes` は `.labels` にリネーム予定
-    # return "".join([label.phoneme for label in ojt_container.labels])
+    return "".join([label.phoneme for label in ojt_container.labels])
 
 
 def space_jointed_phonemes(ojt_container: OjtContainer) -> str:
     """コンテナインスタンスに直接的・間接的に含まれる全ラベルの音素文字を ` ` 挟みながら結合してを返す"""
-    return " ".join([label.phoneme for label in ojt_container.phonemes])
-    # NOTE: `.phonemes` は `.labels` にリネーム予定
-    # return " ".join([label.phoneme for label in ojt_container.labels])
+    return " ".join([label.phoneme for label in ojt_container.labels])
 
 
 class TestPhoneme(TestBasePhonemes):
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index 3ed92faed..e9d14a5da 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -75,10 +75,8 @@ class Mora:
     vowel: Label
 
     @property
-    def phonemes(self):
-        """このモーラを構成するラベルリスト。母音ラベルのみの場合は [母音ラベル,]、子音ラベルもある場合は [子音ラベル, 母音ラベル]。
-        NOTE: `.labels` に名称変更予定
-        """
+    def labels(self) -> list[Label]:
+        """このモーラを構成するラベルリスト。母音ラベルのみの場合は [母音ラベル,]、子音ラベルもある場合は [子音ラベル, 母音ラベル]。"""
         if self.consonant is not None:
             return [self.consonant, self.vowel]
         else:
@@ -154,16 +152,9 @@ def from_labels(cls, labels: list[Label]) -> Self:
         return accent_phrase
 
     @property
-    def phonemes(self):
-        """
-        内包する全てのラベルを返す
-        NOTE: `.labels` に名称変更予定
-        Returns
-        -------
-        labels : list[Label]
-            AccentPhraseに間接的に含まれる全てのLabelを返す
-        """
-        return list(chain.from_iterable(m.phonemes for m in self.moras))
+    def labels(self) -> list[Label]:
+        """内包する全てのラベルを返す"""
+        return list(chain.from_iterable(m.labels for m in self.moras))
 
 
 @dataclass
@@ -212,18 +203,11 @@ def from_labels(cls, labels: list[Label]) -> Self:
         return breath_group
 
     @property
-    def phonemes(self):
-        """
-        内包する全てのラベルを返す
-        NOTE: `.labels` に名称変更予定
-        Returns
-        -------
-        labels : list[Label]
-            BreathGroupに間接的に含まれる全てのLabelを返す
-        """
+    def labels(self) -> list[Label]:
+        """内包する全てのラベルを返す"""
         return list(
             chain.from_iterable(
-                accent_phrase.phonemes for accent_phrase in self.accent_phrases
+                accent_phrase.labels for accent_phrase in self.accent_phrases
             )
         )
 
@@ -276,22 +260,15 @@ def from_labels(cls, labels: list[Label]) -> Self:
         return utterance
 
     @property
-    def phonemes(self):
-        """
-        内包する全てのラベルを返す
-        NOTE: `.labels` に名称変更予定
-        Returns
-        -------
-        labels : list[Label]
-            Utteranceクラスに直接的・間接的に含まれる、全てのLabelを返す
-        """
+    def labels(self) -> list[Label]:
+        """内包する全てのラベルを返す"""
         labels: list[Label] = []
         for i in range(len(self.pauses)):
             if self.pauses[i] is not None:
                 labels += [self.pauses[i]]
 
             if i < len(self.pauses) - 1:
-                labels += self.breath_groups[i].phonemes
+                labels += self.breath_groups[i].labels
 
         return labels
 
@@ -348,7 +325,7 @@ def full_context_label_moras_to_moras(full_context_moras: list[Mora]) -> list[Vv
     """
     return [
         VvMora(
-            text=mora_to_text("".join([p.phoneme for p in mora.phonemes])),
+            text=mora_to_text("".join([p.phoneme for p in mora.labels])),
             consonant=(mora.consonant.phoneme if mora.consonant is not None else None),
             consonant_length=0 if mora.consonant is not None else None,
             vowel=mora.vowel.phoneme,

From 9afbe25e0d97a788b88e9bc3c68569ce41b7f625 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 23 Dec 2023 02:48:26 +0900
Subject: [PATCH 063/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`OjtPhoneme`=20?=
 =?UTF-8?q?=E2=86=92=20`Phoneme`=20=E3=83=AA=E3=83=8D=E3=83=BC=E3=83=A0=20?=
 =?UTF-8?q?(#920)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `OjtPhoneme` → `Phoneme` リネーム

* fix: lint
---
 test/test_acoustic_feature_extractor.py       | 20 +++---
 test/test_synthesis_engine.py                 | 43 ++++++------
 .../acoustic_feature_extractor.py             | 30 +++-----
 voicevox_engine/tts_pipeline/tts_engine.py    | 68 ++++---------------
 4 files changed, 53 insertions(+), 108 deletions(-)

diff --git a/test/test_acoustic_feature_extractor.py b/test/test_acoustic_feature_extractor.py
index 9e2a4867c..3840a5c8d 100644
--- a/test/test_acoustic_feature_extractor.py
+++ b/test/test_acoustic_feature_extractor.py
@@ -1,27 +1,27 @@
 from unittest import TestCase
 
-from voicevox_engine.tts_pipeline.acoustic_feature_extractor import OjtPhoneme
+from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
 
 TRUE_NUM_PHONEME = 45
 
 
-class TestOjtPhoneme(TestCase):
+class TestPhoneme(TestCase):
     def setUp(self):
         super().setUp()
         # list_idx      0 1 2 3 4 5  6 7 8 9  10 1 2 3 4 5 6 7 8   9
         hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil".split()
-        self.ojt_hello_hiho = [OjtPhoneme(s) for s in hello_hiho]
+        self.ojt_hello_hiho = [Phoneme(s) for s in hello_hiho]
 
     def test_const(self):
-        self.assertEqual(OjtPhoneme._NUM_PHONEME, TRUE_NUM_PHONEME)
-        self.assertEqual(OjtPhoneme._PHONEME_LIST[1], "A")
-        self.assertEqual(OjtPhoneme._PHONEME_LIST[14], "e")
-        self.assertEqual(OjtPhoneme._PHONEME_LIST[26], "m")
-        self.assertEqual(OjtPhoneme._PHONEME_LIST[38], "ts")
-        self.assertEqual(OjtPhoneme._PHONEME_LIST[41], "v")
+        self.assertEqual(Phoneme._NUM_PHONEME, TRUE_NUM_PHONEME)
+        self.assertEqual(Phoneme._PHONEME_LIST[1], "A")
+        self.assertEqual(Phoneme._PHONEME_LIST[14], "e")
+        self.assertEqual(Phoneme._PHONEME_LIST[26], "m")
+        self.assertEqual(Phoneme._PHONEME_LIST[38], "ts")
+        self.assertEqual(Phoneme._PHONEME_LIST[41], "v")
 
     def test_convert(self):
-        sil_phoneme = OjtPhoneme("sil")
+        sil_phoneme = Phoneme("sil")
         self.assertEqual(sil_phoneme.phoneme, "pau")
 
     def test_phoneme_id(self):
diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 98a941d9f..b67d87cad 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -7,7 +7,7 @@
 
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
-from voicevox_engine.tts_pipeline.acoustic_feature_extractor import OjtPhoneme
+from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
 from voicevox_engine.tts_pipeline.tts_engine import (
@@ -31,15 +31,15 @@
 TRUE_NUM_PHONEME = 45
 
 
-def is_same_phoneme(p1: OjtPhoneme, p2: OjtPhoneme) -> bool:
-    """2つのOjtPhonemeが同じ `.phoneme` を持つ"""
+def is_same_phoneme(p1: Phoneme, p2: Phoneme) -> bool:
+    """2つのPhonemeが同じ `.phoneme` を持つ"""
     return p1.phoneme == p2.phoneme
 
 
 def is_same_ojt_phoneme_list(
-    p1s: list[OjtPhoneme | None], p2s: list[OjtPhoneme | None]
+    p1s: list[Phoneme | None], p2s: list[Phoneme | None]
 ) -> bool:
-    """2つのOjtPhonemeリストで全要素ペアが同じ `.phoneme` を持つ"""
+    """2つのPhonemeリストで全要素ペアが同じ `.phoneme` を持つ"""
     if len(p1s) != len(p2s):
         return False
 
@@ -470,8 +470,7 @@ def setUp(self):
             "sil k o N n i ch i w a pau h i h o d e s U sil".split()
         )
         self.phoneme_data_list_hello_hiho = [
-            OjtPhoneme(p)
-            for p in "pau k o N n i ch i w a pau h i h o d e s U pau".split()
+            Phoneme(p) for p in "pau k o N n i ch i w a pau h i h o d e s U pau".split()
         ]
         self.accent_phrases_hello_hiho = [
             AccentPhrase(
@@ -520,7 +519,7 @@ def test_split_mora(self):
         self.assertEqual(vowel_indexes, [0, 2, 3, 5, 7, 9, 10, 12, 14, 16, 18, 19])
 
         ps = ["pau", "o", "N", "i", "i", "a", "pau", "i", "o", "e", "U", "pau"]
-        true_vowel_phoneme_list = [OjtPhoneme(p) for p in ps]
+        true_vowel_phoneme_list = [Phoneme(p) for p in ps]
         self.assertTrue(
             is_same_ojt_phoneme_list(vowel_phoneme_list, true_vowel_phoneme_list)
         )
@@ -529,16 +528,16 @@ def test_split_mora(self):
                 consonant_phoneme_list,
                 [
                     None,
-                    OjtPhoneme("k"),
+                    Phoneme("k"),
                     None,
-                    OjtPhoneme("n"),
-                    OjtPhoneme("ch"),
-                    OjtPhoneme("w"),
+                    Phoneme("n"),
+                    Phoneme("ch"),
+                    Phoneme("w"),
                     None,
-                    OjtPhoneme("h"),
-                    OjtPhoneme("h"),
-                    OjtPhoneme("d"),
-                    OjtPhoneme("s"),
+                    Phoneme("h"),
+                    Phoneme("h"),
+                    Phoneme("d"),
+                    Phoneme("s"),
                     None,
                 ],
             )
@@ -552,7 +551,7 @@ def test_pre_process(self):
         mora_index = 0
         phoneme_index = 1
 
-        self.assertTrue(is_same_phoneme(phoneme_data_list[0], OjtPhoneme("pau")))
+        self.assertTrue(is_same_phoneme(phoneme_data_list[0], Phoneme("pau")))
         for accent_phrase in self.accent_phrases_hello_hiho:
             moras = accent_phrase.moras
             for mora in moras:
@@ -561,13 +560,13 @@ def test_pre_process(self):
                 if mora.consonant is not None:
                     self.assertTrue(
                         is_same_phoneme(
-                            phoneme_data_list[phoneme_index], OjtPhoneme(mora.consonant)
+                            phoneme_data_list[phoneme_index], Phoneme(mora.consonant)
                         )
                     )
                     phoneme_index += 1
                 self.assertTrue(
                     is_same_phoneme(
-                        phoneme_data_list[phoneme_index], OjtPhoneme(mora.vowel)
+                        phoneme_data_list[phoneme_index], Phoneme(mora.vowel)
                     )
                 )
                 phoneme_index += 1
@@ -575,11 +574,11 @@ def test_pre_process(self):
                 self.assertEqual(flatten_moras[mora_index], accent_phrase.pause_mora)
                 mora_index += 1
                 self.assertTrue(
-                    is_same_phoneme(phoneme_data_list[phoneme_index], OjtPhoneme("pau"))
+                    is_same_phoneme(phoneme_data_list[phoneme_index], Phoneme("pau"))
                 )
                 phoneme_index += 1
         self.assertTrue(
-            is_same_phoneme(phoneme_data_list[phoneme_index], OjtPhoneme("pau"))
+            is_same_phoneme(phoneme_data_list[phoneme_index], Phoneme("pau"))
         )
 
     def test_replace_phoneme_length(self):
@@ -683,7 +682,7 @@ def test_replace_mora_pitch(self):
         def result_value(i: int):
             # unvoiced_mora_phoneme_listのPhoneme ID版
             unvoiced_mora_phoneme_id_list = [
-                OjtPhoneme(p).phoneme_id for p in unvoiced_mora_phoneme_list
+                Phoneme(p).phoneme_id for p in unvoiced_mora_phoneme_list
             ]
             if vowel_phoneme_list[i] in unvoiced_mora_phoneme_id_list:
                 return 0
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index 066bfbdab..2b6cfeed3 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -12,10 +12,8 @@
 _NUM_PHONEME = len(_PHONEME_LIST)
 
 
-class OjtPhoneme:
-    """
-    OpenJTalkに含まれる音素
-    """
+class Phoneme:
+    """音素"""
 
     _PHONEME_LIST = _PHONEME_LIST
     _NUM_PHONEME = _NUM_PHONEME
@@ -32,25 +30,13 @@ def __eq__(self, o: object):
         raise NotImplementedError
 
     @property
-    def phoneme_id(self):
-        """
-        phoneme_id (phoneme list内でのindex)を取得する
-        Returns
-        -------
-        id : int
-            phoneme_idを返す
-        """
+    def phoneme_id(self) -> int:
+        """音素ID (音素リスト内でのindex) を取得する"""
         return self._PHONEME_LIST.index(self.phoneme)
 
     @property
     def onehot(self):
-        """
-        音素onehotベクトル
-        Returns
-        -------
-        onehot : numpy.ndarray
-            音素onehotベクトル（listの長さ分の0埋め配列のうち、phoneme id番目が1.0の配列）
-        """
-        array = numpy.zeros(self._NUM_PHONEME, dtype=numpy.float32)
-        array[self.phoneme_id] = 1.0
-        return array
+        """音素onehotベクトルを取得する"""
+        vec = numpy.zeros(self._NUM_PHONEME, dtype=numpy.float32)
+        vec[self.phoneme_id] = 1.0
+        return vec
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 56f8414db..8f3edd3e9 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -8,7 +8,7 @@
 
 from ..core_wrapper import CoreWrapper, OldCoreError
 from ..model import AccentPhrase, AudioQuery, Mora
-from .acoustic_feature_extractor import OjtPhoneme
+from .acoustic_feature_extractor import Phoneme
 from .tts_engine_base import TTSEngineBase
 
 unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
@@ -36,46 +36,18 @@ def to_flatten_moras(accent_phrases: list[AccentPhrase]) -> list[Mora]:
     return moras
 
 
-def to_flatten_phonemes(moras: list[Mora]) -> list[OjtPhoneme]:
-    """
-    モーラ系列に含まれる音素の抽出
-    Parameters
-    ----------
-    moras : list[Mora]
-        モーラ系列
-    Returns
-    -------
-    phonemes : list[OjtPhoneme]
-        音素系列
-    """
-    phonemes: list[OjtPhoneme] = []
+def to_flatten_phonemes(moras: list[Mora]) -> list[Phoneme]:
+    """モーラ系列から音素系列を抽出する"""
+    phonemes: list[Phoneme] = []
     for mora in moras:
         if mora.consonant:
-            phonemes += [OjtPhoneme(mora.consonant)]
-        phonemes += [(OjtPhoneme(mora.vowel))]
+            phonemes += [Phoneme(mora.consonant)]
+        phonemes += [(Phoneme(mora.vowel))]
     return phonemes
 
 
-def split_mora(phoneme_list: List[OjtPhoneme]):
-    """
-    OjtPhonemeのリストから、
-    母音の位置(vowel_indexes)
-    母音の音素列(vowel_phoneme_list)
-    子音の音素列(consonant_phoneme_list)
-    を生成し、返す
-    Parameters
-    ----------
-    phoneme_list : List[OjtPhoneme]
-        phonemeクラスのリスト
-    Returns
-    -------
-    consonant_phoneme_list : List[OjtPhoneme]
-        子音の音素列
-    vowel_phoneme_list : List[OjtPhoneme]
-        母音の音素列
-    vowel_indexes : : List[int]
-        母音の位置
-    """
+def split_mora(phoneme_list: List[Phoneme]):
+    """音素系列から子音系列・母音系列・母音位置を抽出する"""
     vowel_indexes = [
         i for i, p in enumerate(phoneme_list) if p.phoneme in mora_phoneme_list
     ]
@@ -85,7 +57,7 @@ def split_mora(phoneme_list: List[OjtPhoneme]):
     # 1の場合はconsonant(子音)が存在しない=母音のみ(a/i/u/e/o/N/cl/pau)で構成されるモーラ(音)である
     # 2の場合はconsonantが存在するモーラである
     # なので、2の場合(else)でphonemeを取り出している
-    consonant_phoneme_list: List[Optional[OjtPhoneme]] = [None] + [
+    consonant_phoneme_list: List[Optional[Phoneme]] = [None] + [
         None if post - prev == 1 else phoneme_list[post - 1]
         for prev, post in zip(vowel_indexes[:-1], vowel_indexes[1:])
     ]
@@ -94,25 +66,13 @@ def split_mora(phoneme_list: List[OjtPhoneme]):
 
 def pre_process(
     accent_phrases: list[AccentPhrase],
-) -> tuple[list[Mora], list[OjtPhoneme]]:
-    """
-    AccentPhraseモデルのリストを整形し、処理に必要なデータの原型を作り出す
-    Parameters
-    ----------
-    accent_phrases : List[AccentPhrase]
-        AccentPhraseモデルのリスト
-    Returns
-    -------
-    flatten_moras : List[Mora]
-        モーラ列（前後の無音含まない）
-    phonemes : List[OjtPhoneme]
-        音素列（前後の無音含む）
-    """
+) -> tuple[list[Mora], list[Phoneme]]:
+    """アクセント句系列から（前後の無音含まない）モーラ系列と（前後の無音含む）音素系列を抽出する"""
     flatten_moras = to_flatten_moras(accent_phrases)
     phonemes = to_flatten_phonemes(flatten_moras)
 
     # 前後無音の追加
-    phonemes = [OjtPhoneme("pau")] + phonemes + [OjtPhoneme("pau")]
+    phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]
 
     return flatten_moras, phonemes
 
@@ -387,7 +347,7 @@ def replace_phoneme_length(
 
         # 音素系列を抽出し前後無音を付加する
         phonemes = to_flatten_phonemes(moras)
-        phonemes = [OjtPhoneme("pau")] + phonemes + [OjtPhoneme("pau")]
+        phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]
 
         # 音素クラスから音素IDスカラへ表現を変換する
         phoneme_ids = numpy.array([p.phoneme_id for p in phonemes], dtype=numpy.int64)
@@ -429,7 +389,7 @@ def replace_mora_pitch(
             return []
 
         # phoneme
-        # AccentPhraseをすべてMoraおよびOjtPhonemeの形に分解し、処理可能な形にする
+        # AccentPhraseをすべてMoraおよびPhonemeの形に分解し、処理可能な形にする
         flatten_moras, phoneme_data_list = pre_process(accent_phrases)
 
         # accent

From eff29cf1eb94929ff558047bb9ed6f30259b6b3f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 23 Dec 2023 03:37:17 +0900
Subject: [PATCH 064/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=95?=
 =?UTF-8?q?=E3=83=AB=E3=82=B3=E3=83=B3=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E3=83=A9=E3=83=99=E3=83=AB=E7=B3=BB=E3=82=AF=E3=83=A9=E3=82=B9?=
 =?UTF-8?q?=E3=81=AE=E3=83=AA=E3=83=8D=E3=83=BC=E3=83=A0=20(#919)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: フルコンテキストラベル系クラスのリネーム

* refactor: リネームに関わる関数名の変更

* refactor: import alias の廃止

* refactor: リネームに関わるドキュメントの改善

* fix: lint

* Apply suggestions from code review

* VvMora→Mora

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_full_context_label.py               |  40 ++---
 .../tts_pipeline/full_context_label.py        | 168 ++++++------------
 2 files changed, 73 insertions(+), 135 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index ad951d0fd..48019ddbe 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -1,11 +1,11 @@
 from unittest import TestCase
 
 from voicevox_engine.tts_pipeline.full_context_label import (
-    AccentPhrase,
-    BreathGroup,
+    AccentPhraseLabel,
+    BreathGroupLabel,
     Label,
-    Mora,
-    Utterance,
+    MoraLabel,
+    UtteranceLabel,
 )
 
 
@@ -28,7 +28,7 @@ def contexts_to_feature(contexts: dict[str, str]) -> str:
 
 
 # OpenJTalk コンテナクラス
-OjtContainer = Mora | AccentPhrase | BreathGroup | Utterance
+OjtContainer = MoraLabel | AccentPhraseLabel | BreathGroupLabel | UtteranceLabel
 
 
 def features(ojt_container: OjtContainer):
@@ -187,41 +187,41 @@ class TestMora(TestBasePhonemes):
     def setUp(self) -> None:
         super().setUp()
         # contexts["a2"] == "1" ko
-        self.mora_hello_1 = Mora(
+        self.mora_hello_1 = MoraLabel(
             consonant=self.phonemes_hello_hiho[1], vowel=self.phonemes_hello_hiho[2]
         )
         # contexts["a2"] == "2" N
-        self.mora_hello_2 = Mora(consonant=None, vowel=self.phonemes_hello_hiho[3])
+        self.mora_hello_2 = MoraLabel(consonant=None, vowel=self.phonemes_hello_hiho[3])
         # contexts["a2"] == "3" ni
-        self.mora_hello_3 = Mora(
+        self.mora_hello_3 = MoraLabel(
             consonant=self.phonemes_hello_hiho[4], vowel=self.phonemes_hello_hiho[5]
         )
         # contexts["a2"] == "4" chi
-        self.mora_hello_4 = Mora(
+        self.mora_hello_4 = MoraLabel(
             consonant=self.phonemes_hello_hiho[6], vowel=self.phonemes_hello_hiho[7]
         )
         # contexts["a2"] == "5" wa
-        self.mora_hello_5 = Mora(
+        self.mora_hello_5 = MoraLabel(
             consonant=self.phonemes_hello_hiho[8], vowel=self.phonemes_hello_hiho[9]
         )
         # contexts["a2"] == "1" hi
-        self.mora_hiho_1 = Mora(
+        self.mora_hiho_1 = MoraLabel(
             consonant=self.phonemes_hello_hiho[11], vowel=self.phonemes_hello_hiho[12]
         )
         # contexts["a2"] == "2" ho
-        self.mora_hiho_2 = Mora(
+        self.mora_hiho_2 = MoraLabel(
             consonant=self.phonemes_hello_hiho[13], vowel=self.phonemes_hello_hiho[14]
         )
         # contexts["a2"] == "3" de
-        self.mora_hiho_3 = Mora(
+        self.mora_hiho_3 = MoraLabel(
             consonant=self.phonemes_hello_hiho[15], vowel=self.phonemes_hello_hiho[16]
         )
         # contexts["a2"] == "1" sU
-        self.mora_hiho_4 = Mora(
+        self.mora_hiho_4 = MoraLabel(
             consonant=self.phonemes_hello_hiho[17], vowel=self.phonemes_hello_hiho[18]
         )
 
-    def assert_labels(self, mora: Mora, label_start: int, label_end: int) -> None:
+    def assert_labels(self, mora: MoraLabel, label_start: int, label_end: int) -> None:
         self.assertEqual(
             features(mora), self.test_case_hello_hiho[label_start:label_end]
         )
@@ -254,10 +254,10 @@ def setUp(self) -> None:
         super().setUp()
         # TODO: ValueErrorを吐く作為的ではない自然な例の模索
         # 存在しないなら放置でよい
-        self.accent_phrase_hello = AccentPhrase.from_labels(
+        self.accent_phrase_hello = AccentPhraseLabel.from_labels(
             self.phonemes_hello_hiho[1:10]
         )
-        self.accent_phrase_hiho = AccentPhrase.from_labels(
+        self.accent_phrase_hiho = AccentPhraseLabel.from_labels(
             self.phonemes_hello_hiho[11:19]
         )
 
@@ -283,10 +283,10 @@ def test_labels(self):
 class TestBreathGroup(TestBasePhonemes):
     def setUp(self) -> None:
         super().setUp()
-        self.breath_group_hello = BreathGroup.from_labels(
+        self.breath_group_hello = BreathGroupLabel.from_labels(
             self.phonemes_hello_hiho[1:10]
         )
-        self.breath_group_hiho = BreathGroup.from_labels(
+        self.breath_group_hiho = BreathGroupLabel.from_labels(
             self.phonemes_hello_hiho[11:19]
         )
 
@@ -308,7 +308,7 @@ def test_labels(self):
 class TestUtterance(TestBasePhonemes):
     def setUp(self) -> None:
         super().setUp()
-        self.utterance_hello_hiho = Utterance.from_labels(self.phonemes_hello_hiho)
+        self.utterance_hello_hiho = UtteranceLabel.from_labels(self.phonemes_hello_hiho)
 
     def test_phonemes(self):
         outputs_hello_hiho = space_jointed_phonemes(self.utterance_hello_hiho)
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index e9d14a5da..54149f3de 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -5,23 +5,15 @@
 
 import pyopenjtalk
 
-from ..model import AccentPhrase as VvAccentPhrase  # NOTE: 後にOjtコンテナクラスをリネーム予定
-from ..model import Mora as VvMora
+from ..model import AccentPhrase, Mora
 from .mora_list import openjtalk_mora2text
 
 
 @dataclass
 class Label:
-    """
-    OpenJTalk Label
-
-    Attributes
-    ----------
-    contexts: dict[str, str]
-        ラベルの属性
-    """
+    """OpenJTalkラベル"""
 
-    contexts: dict[str, str]
+    contexts: dict[str, str]  # ラベルの属性
 
     @classmethod
     def from_feature(cls, feature: str):
@@ -58,21 +50,11 @@ def __repr__(self):
 
 
 @dataclass
-class Mora:
-    """
-    モーラクラス
-    モーラは1音素(母音や促音「っ」、撥音「ん」など)か、2音素(母音と子音の組み合わせ)で成り立つ
-
-    Attributes
-    ----------
-    consonant : Label | None
-        子音
-    vowel : Label
-        母音
-    """
+class MoraLabel:
+    """モーララベル。モーラは1音素(母音や促音「っ」、撥音「ん」など)か、2音素(母音と子音の組み合わせ)で成り立つ。"""
 
-    consonant: Label | None
-    vowel: Label
+    consonant: Label | None  # 子音
+    vowel: Label  # 母音
 
     @property
     def labels(self) -> list[Label]:
@@ -84,29 +66,20 @@ def labels(self) -> list[Label]:
 
 
 @dataclass
-class AccentPhrase:
-    """
-    アクセント句クラス
-    同じアクセントのMoraを複数保持する
-    Attributes
-    ----------
-    moras : list[Mora]
-        音韻のリスト
-    accent : int
-        アクセント
-    """
+class AccentPhraseLabel:
+    """アクセント句ラベル"""
 
-    moras: list[Mora]
-    accent: int
-    is_interrogative: bool
+    moras: list[MoraLabel]  # モーラ系列
+    accent: int  # アクセント位置
+    is_interrogative: bool  # 疑問文か否か
 
     @classmethod
     def from_labels(cls, labels: list[Label]) -> Self:
-        """ラベル系列をcontextで区切りAccentPhraseインスタンスを生成する"""
+        """ラベル系列をcontextで区切りアクセント句ラベルを生成する"""
 
         # NOTE:「モーラごとのラベル系列」はラベル系列をcontextで区切り生成される。
 
-        moras: list[Mora] = []  # モーラ系列
+        moras: list[MoraLabel] = []  # モーラ系列
         mora_labels: list[Label] = []  # モーラごとのラベル系列を一時保存するコンテナ
 
         for label, next_label in zip(labels, labels[1:] + [None]):
@@ -129,7 +102,7 @@ def from_labels(cls, labels: list[Label]) -> Self:
                 else:
                     raise ValueError(mora_labels)
                 # 子音と母音からモーラを生成して保存する
-                mora = Mora(consonant=consonant, vowel=vowel)
+                mora = MoraLabel(consonant=consonant, vowel=vowel)
                 moras.append(mora)
                 # 次に向けてリセット
                 mora_labels = []
@@ -144,7 +117,7 @@ def from_labels(cls, labels: list[Label]) -> Self:
         # f3はアクセント句が疑問文かどうか（1で疑問文）
         is_interrogative = moras[-1].vowel.contexts["f3"] == "1"
 
-        # AccentPhrase インスタンスを生成する
+        # アクセント句ラベルを生成する
         accent_phrase = cls(
             moras=moras, accent=accent, is_interrogative=is_interrogative
         )
@@ -158,25 +131,18 @@ def labels(self) -> list[Label]:
 
 
 @dataclass
-class BreathGroup:
-    """
-    発声の区切りクラス
-    アクセントの異なるアクセント句を複数保持する
-    Attributes
-    ----------
-    accent_phrases : list[AccentPhrase]
-        アクセント句のリスト
-    """
+class BreathGroupLabel:
+    """発声区切りラベル"""
 
-    accent_phrases: list[AccentPhrase]
+    accent_phrases: list[AccentPhraseLabel]  # アクセント句のリスト
 
     @classmethod
     def from_labels(cls, labels: list[Label]) -> Self:
-        """ラベル系列をcontextで区切りBreathGroupインスタンスを生成する"""
+        """ラベル系列をcontextで区切りBreathGroupLabelインスタンスを生成する"""
 
         # NOTE:「アクセント句ごとのラベル系列」はラベル系列をcontextで区切り生成される。
 
-        accent_phrases: list[AccentPhrase] = []  # アクセント句系列
+        accent_phrases: list[AccentPhraseLabel] = []  # アクセント句系列
         accent_labels: list[Label] = []  # アクセント句ごとのラベル系列を一時保存するコンテナ
 
         for label, next_label in zip(labels, labels[1:] + [None]):
@@ -192,12 +158,12 @@ def from_labels(cls, labels: list[Label]) -> Self:
                 or label.contexts["f5"] != next_label.contexts["f5"]
             ):
                 # アクセント句を生成して保存する
-                accent_phrase = AccentPhrase.from_labels(accent_labels)
+                accent_phrase = AccentPhraseLabel.from_labels(accent_labels)
                 accent_phrases.append(accent_phrase)
                 # 次に向けてリセット
                 accent_labels = []
 
-        # BreathGroup インスタンスを生成する
+        # BreathGroupLabel インスタンスを生成する
         breath_group = cls(accent_phrases=accent_phrases)
 
         return breath_group
@@ -213,30 +179,21 @@ def labels(self) -> list[Label]:
 
 
 @dataclass
-class Utterance:
-    """
-    発声クラス
-    発声の区切りと無音を複数保持する
-    Attributes
-    ----------
-    breath_groups : list[BreathGroup]
-        発声の区切りのリスト
-    pauses : list[Label]
-        無音のリスト
-    """
+class UtteranceLabel:
+    """発声ラベル"""
 
-    breath_groups: list[BreathGroup]
-    pauses: list[Label]
+    breath_groups: list[BreathGroupLabel]  # 発声の区切りのリスト
+    pauses: list[Label]  # 無音のリスト
 
     @classmethod
     def from_labels(cls, labels: list[Label]) -> Self:
-        """ラベル系列をポーズで区切りUtteranceインスタンスを生成する"""
+        """ラベル系列をポーズで区切りUtteranceLabelインスタンスを生成する"""
 
-        # NOTE:「BreathGroupごとのラベル系列」はラベル系列をポーズで区切り生成される。
+        # NOTE:「BreathGroupLabelごとのラベル系列」はラベル系列をポーズで区切り生成される。
 
         pauses: list[Label] = []  # ポーズラベルのリスト
-        breath_groups: list[BreathGroup] = []  # BreathGroup のリスト
-        group_labels: list[Label] = []  # BreathGroupごとのラベル系列を一時保存するコンテナ
+        breath_groups: list[BreathGroupLabel] = []  # BreathGroupLabel のリスト
+        group_labels: list[Label] = []  # BreathGroupLabelごとのラベル系列を一時保存するコンテナ
 
         for label in labels:
             # ポーズが出現するまでラベル系列を一時保存する
@@ -248,13 +205,13 @@ def from_labels(cls, labels: list[Label]) -> Self:
                 # ポーズラベルを保存する
                 pauses.append(label)
                 if len(group_labels) > 0:
-                    # ラベル系列からBreathGroupを生成して保存する
-                    breath_group = BreathGroup.from_labels(group_labels)
+                    # ラベル系列からBreathGroupLabelを生成して保存する
+                    breath_group = BreathGroupLabel.from_labels(group_labels)
                     breath_groups.append(breath_group)
                     # 次に向けてリセット
                     group_labels = []
 
-        # Utteranceインスタンスを生成する
+        # UtteranceLabelインスタンスを生成する
         utterance = cls(breath_groups=breath_groups, pauses=pauses)
 
         return utterance
@@ -273,21 +230,11 @@ def labels(self) -> list[Label]:
         return labels
 
 
-def extract_full_context_label(text: str):
-    """
-    日本語テキストから発話クラスを抽出
-    Parameters
-    ----------
-    text : str
-        日本語テキスト
-    Returns
-    -------
-    utterance : Utterance
-        発話
-    """
-    features: list[str] = pyopenjtalk.extract_fullcontext(text)
+def _extract_utterance_label(text: str) -> UtteranceLabel:
+    """日本語文からUtteranceLabelを抽出する"""
+    features: list[str] = pyopenjtalk.extract_fullcontext(text)  # type: ignore
     labels = [Label.from_feature(feature) for feature in features]
-    utterance = Utterance.from_labels(labels)
+    utterance = UtteranceLabel.from_labels(labels)
     return utterance
 
 
@@ -311,20 +258,10 @@ def mora_to_text(mora: str) -> str:
         return mora
 
 
-def full_context_label_moras_to_moras(full_context_moras: list[Mora]) -> list[VvMora]:
-    """
-    Moraクラスのキャスト (`Mora` -> `model.Mora`)
-    Parameters
-    ----------
-    full_context_moras : List[Mora]
-        モーラ系列
-    Returns
-    -------
-    moras : List[Mora]
-        モーラ系列。音素長・モーラ音高は 0 初期化
-    """
+def _mora_labels_to_moras(mora_labels: list[MoraLabel]) -> list[Mora]:
+    """MoraLabel系列をMora系列へキャストする。音素長と音高は 0 初期化"""
     return [
-        VvMora(
+        Mora(
             text=mora_to_text("".join([p.phoneme for p in mora.labels])),
             consonant=(mora.consonant.phoneme if mora.consonant is not None else None),
             consonant_length=0 if mora.consonant is not None else None,
@@ -332,18 +269,18 @@ def full_context_label_moras_to_moras(full_context_moras: list[Mora]) -> list[Vv
             vowel_length=0,
             pitch=0,
         )
-        for mora in full_context_moras
+        for mora in mora_labels
     ]
 
 
-def utterance_to_accent_phrases(utterance: Utterance) -> list[VvAccentPhrase]:
-    """Utteranceインスタンスをアクセント句系列へドメイン変換する"""
+def _utterance_to_accent_phrases(utterance: UtteranceLabel) -> list[AccentPhrase]:
+    """UtteranceLabelインスタンスをアクセント句系列へドメイン変換する"""
     return [
-        VvAccentPhrase(
-            moras=full_context_label_moras_to_moras(accent_phrase.moras),
+        AccentPhrase(
+            moras=_mora_labels_to_moras(accent_phrase.moras),
             accent=accent_phrase.accent,
             pause_mora=(
-                VvMora(
+                Mora(
                     text="、",
                     consonant=None,
                     consonant_length=None,
@@ -364,14 +301,15 @@ def utterance_to_accent_phrases(utterance: Utterance) -> list[VvAccentPhrase]:
     ]
 
 
-def text_to_accent_phrases(text: str) -> list[VvAccentPhrase]:
-    """日本語テキストからアクセント句系列を生成"""
+def text_to_accent_phrases(text: str) -> list[AccentPhrase]:
+    """日本語文からアクセント句系列を生成する"""
     if len(text.strip()) == 0:
         return []
 
-    # 音素とアクセントの推定
-    utterance = extract_full_context_label(text)
+    # 日本語文からUtteranceLabelを抽出する
+    utterance = _extract_utterance_label(text)
     if len(utterance.breath_groups) == 0:
         return []
 
-    return utterance_to_accent_phrases(utterance)
+    # ドメインを変換する
+    return _utterance_to_accent_phrases(utterance)

From df61fa20512004c0368898cb413604644e8d6418 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 23 Dec 2023 03:55:01 +0900
Subject: [PATCH 065/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E3=82=B3?=
 =?UTF-8?q?=E3=82=A2=20Mock=20(#923)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: コア Mock とその受け入れ

* fix: rename
---
 build_util/make_docs.py                       |   4 +-
 test/test_mock_synthesis_engine.py            |   3 +-
 voicevox_engine/dev/core/__init__.py          |  18 +-
 voicevox_engine/dev/core/mock.py              | 194 ++++++++----------
 voicevox_engine/dev/synthesis_engine/mock.py  |  33 ++-
 .../tts_pipeline/make_tts_engines.py          |   7 +-
 6 files changed, 105 insertions(+), 154 deletions(-)

diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index ad7135e5d..c9f403654 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -1,7 +1,7 @@
 import json
 from pathlib import Path
 
-from voicevox_engine.dev.core import mock as core
+from voicevox_engine.dev.core import MockCoreWrapper
 from voicevox_engine.dev.synthesis_engine.mock import MockTTSEngine
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
@@ -37,7 +37,7 @@ def generate_api_docs_html(schema: str) -> str:
 
     # FastAPI の機能を用いて OpenAPI schema を生成する
     app = run.generate_app(
-        synthesis_engines={"mock": MockTTSEngine(speakers=core.metas())},
+        synthesis_engines={"mock": MockTTSEngine(MockCoreWrapper())},
         latest_core_version="mock",
         setting_loader=SettingLoader(USER_SETTING_PATH),
         preset_manager=PresetManager(  # FIXME: impl MockPresetManager
diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py
index e9cf71688..7b4bcffae 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_synthesis_engine.py
@@ -1,5 +1,6 @@
 from unittest import TestCase
 
+from voicevox_engine.dev.core import MockCoreWrapper
 from voicevox_engine.dev.synthesis_engine import MockTTSEngine
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.kana_parser import create_kana
@@ -102,7 +103,7 @@ def setUp(self):
                 pause_mora=None,
             ),
         ]
-        self.engine = MockTTSEngine(speakers="", supported_devices="")
+        self.engine = MockTTSEngine(MockCoreWrapper())
 
     def test_replace_phoneme_length(self):
         self.assertEqual(
diff --git a/voicevox_engine/dev/core/__init__.py b/voicevox_engine/dev/core/__init__.py
index 432b00b93..f04d393d3 100644
--- a/voicevox_engine/dev/core/__init__.py
+++ b/voicevox_engine/dev/core/__init__.py
@@ -1,17 +1,3 @@
-from .mock import (
-    decode_forward,
-    initialize,
-    metas,
-    supported_devices,
-    yukarin_s_forward,
-    yukarin_sa_forward,
-)
+from .mock import MockCoreWrapper
 
-__all__ = [
-    "decode_forward",
-    "initialize",
-    "yukarin_s_forward",
-    "yukarin_sa_forward",
-    "metas",
-    "supported_devices",
-]
+__all__ = ["MockCoreWrapper"]
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index c0531fbc8..5d5aa6cd6 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -1,118 +1,86 @@
 import json
-from logging import getLogger
-from typing import Any, Dict, List
-
-import numpy as np
-from pyopenjtalk import tts
-from soxr import resample
-
-DUMMY_TEXT = "これはダミーのテキストです"
-
-
-def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None:
-    pass
-
-
-def yukarin_s_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
-    logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
-    logger.info(
-        "Sorry, yukarin_s_forward() is a mock. Return values are incorrect.",
-    )
-    return np.ones(length) / 5
-
-
-def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
-    logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
-    logger.info(
-        "Sorry, yukarin_sa_forward() is a mock. Return values are incorrect.",
-    )
-    return np.ones((1, length)) * 5
-
-
-def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
-    """
-    合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます（DUMMY_TEXT）
-    参照→TTSEngine のdocstring [Mock]
-
-    Parameters
-    ----------
-    length : int
-        フレームの長さ
-
-    Returns
-    -------
-    wave : np.ndarray
-        音声合成した波形データ
-
-    Note
-    -------
-        ここで行う音声合成では、調声（ピッチ等）を反映しない
-        また、入力内容によらず常に固定の文言を読み上げる
-
-        # pyopenjtalk.tts()の出力仕様
-        dtype=np.float64, 16 bit, mono 48000 Hz
-
-        # resampleの説明
-        非モックdecode_forwardと合わせるために、出力を24kHzに変換した。
-    """
-    logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
-    logger.info(
-        "Sorry, decode_forward() is a mock. Return values are incorrect.",
-    )
-    wave, sr = tts(DUMMY_TEXT)
-    wave = resample(wave.astype("int16"), 48000, 24000)
-    return wave
+from pathlib import Path
+from unittest.mock import Mock
+
+from ...core_wrapper import CoreWrapper
+
+
+class MockCoreWrapper(CoreWrapper):
+    """`CoreWrapper` Mock"""
+
+    def __init__(
+        self,
+        use_gpu: bool = False,
+        core_dir: Path | None = None,
+        cpu_num_threads: int = 0,
+        load_all_models: bool = False,
+    ) -> None:
+        self.default_sampling_rate = 24000
+
+        self.yukarin_s_forward = Mock()
+        self.yukarin_sa_forward = Mock()
+        self.decode_forward = Mock()
+
+    def metas(self) -> str:
+        return json.dumps(
+            [
+                {
+                    "name": "dummy1",
+                    "styles": [
+                        {"name": "style0", "id": 0},
+                        {"name": "style1", "id": 2},
+                        {"name": "style2", "id": 4},
+                        {"name": "style3", "id": 6},
+                    ],
+                    "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
+                    "version": "mock",
+                },
+                {
+                    "name": "dummy2",
+                    "styles": [
+                        {"name": "style0", "id": 1},
+                        {"name": "style1", "id": 3},
+                        {"name": "style2", "id": 5},
+                        {"name": "style3", "id": 7},
+                    ],
+                    "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9",
+                    "version": "mock",
+                },
+                {
+                    "name": "dummy3",
+                    "styles": [
+                        {"name": "style0", "id": 8},
+                    ],
+                    "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a",
+                    "version": "mock",
+                },
+                {
+                    "name": "dummy4",
+                    "styles": [
+                        {"name": "style0", "id": 9},
+                    ],
+                    "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
+                    "version": "mock",
+                },
+            ]
+        )
+
+    def supported_devices(self):
+        return json.dumps(
+            {
+                "cpu": True,
+                "cuda": False,
+            }
+        )
 
+    def finalize(self) -> None:
+        pass
 
-def metas() -> str:
-    return json.dumps(
-        [
-            {
-                "name": "dummy1",
-                "styles": [
-                    {"name": "style0", "id": 0},
-                    {"name": "style1", "id": 2},
-                    {"name": "style2", "id": 4},
-                    {"name": "style3", "id": 6},
-                ],
-                "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
-                "version": "mock",
-            },
-            {
-                "name": "dummy2",
-                "styles": [
-                    {"name": "style0", "id": 1},
-                    {"name": "style1", "id": 3},
-                    {"name": "style2", "id": 5},
-                    {"name": "style3", "id": 7},
-                ],
-                "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9",
-                "version": "mock",
-            },
-            {
-                "name": "dummy3",
-                "styles": [
-                    {"name": "style0", "id": 8},
-                ],
-                "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a",
-                "version": "mock",
-            },
-            {
-                "name": "dummy4",
-                "styles": [
-                    {"name": "style0", "id": 9},
-                ],
-                "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
-                "version": "mock",
-            },
-        ]
-    )
+    def load_model(self, style_id: int) -> None:
+        pass
 
+    def is_model_loaded(self, style_id: int) -> bool:
+        return True
 
-def supported_devices() -> str:
-    return json.dumps(
-        {
-            "cpu": True,
-            "cuda": False,
-        }
-    )
+    def assert_core_success(self, result: bool) -> None:
+        pass
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index b861dc7c9..da6b2eb87 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -1,13 +1,14 @@
 from logging import getLogger
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 import numpy as np
 from pyopenjtalk import tts
 from soxr import resample
 
+from ...core_wrapper import CoreWrapper
 from ...model import AccentPhrase, AudioQuery
 from ...tts_pipeline import TTSEngineBase
-from ...tts_pipeline.tts_engine import to_flatten_moras
+from ...tts_pipeline.tts_engine import CoreAdapter, to_flatten_moras
 
 
 class MockTTSEngine(TTSEngineBase):
@@ -15,30 +16,28 @@ class MockTTSEngine(TTSEngineBase):
     TTSEngine [Mock]
     """
 
-    def __init__(
-        self,
-        speakers: str,
-        supported_devices: Optional[str] = None,
-    ):
-        """
-        __init__ [Mock]
-        """
+    def __init__(self, core: CoreWrapper):
         super().__init__()
-
-        self._speakers = speakers
-        self._supported_devices = supported_devices
+        self.core = CoreAdapter(core)
+        # NOTE: self.coreは将来的に消す予定
 
     @property
     def default_sampling_rate(self) -> int:
-        return 24000
+        return self.core.default_sampling_rate
 
     @property
     def speakers(self) -> str:
-        return self._speakers
+        return self.core.speakers
 
     @property
-    def supported_devices(self) -> Optional[str]:
-        return self._supported_devices
+    def supported_devices(self) -> str | None:
+        return self.core.supported_devices
+
+    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
+        return self.core.initialize_style_id_synthesis(style_id, skip_reinit)
+
+    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
+        return self.core.is_initialized_style_id_synthesis(style_id)
 
     def replace_phoneme_length(
         self, accent_phrases: List[AccentPhrase], style_id: int
diff --git a/voicevox_engine/tts_pipeline/make_tts_engines.py b/voicevox_engine/tts_pipeline/make_tts_engines.py
index c1477b73c..6a3af9352 100644
--- a/voicevox_engine/tts_pipeline/make_tts_engines.py
+++ b/voicevox_engine/tts_pipeline/make_tts_engines.py
@@ -127,14 +127,11 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
 
     else:
         # モック追加
-        from ..dev.core import metas as mock_metas
-        from ..dev.core import supported_devices as mock_supported_devices
+        from ..dev.core import MockCoreWrapper
         from ..dev.synthesis_engine import MockTTSEngine
 
         if "0.0.0" not in synthesis_engines:
             print("Info: Loading mock.")
-            synthesis_engines["0.0.0"] = MockTTSEngine(
-                speakers=mock_metas(), supported_devices=mock_supported_devices()
-            )
+            synthesis_engines["0.0.0"] = MockTTSEngine(MockCoreWrapper())
 
     return synthesis_engines

From d328d92271b50df677a1999aa7d0652dc1b4f299 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sat, 23 Dec 2023 15:32:58 +0900
Subject: [PATCH 066/177] =?UTF-8?q?`/speakers`=20API=E3=82=92=E5=8F=A9?=
 =?UTF-8?q?=E3=81=8Fe2e=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD?=
 =?UTF-8?q?=E5=8A=A0=20(#925)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* speakersAPIを叩くe2eテストを追加

* Add syrupy package to requirements-test.txt

* ^
---
 README.md                                     |  6 ++
 poetry.lock                                   | 42 ++++------
 pyproject.toml                                |  3 +-
 requirements-test.txt                         |  6 +-
 .../test_fetch_speakers_success.json          | 82 +++++++++++++++++++
 test/e2e/conftest.py                          | 15 ++++
 test/e2e/test_validate_speakers.py            | 10 +++
 7 files changed, 132 insertions(+), 32 deletions(-)
 create mode 100644 test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
 create mode 100644 test/e2e/test_validate_speakers.py

diff --git a/README.md b/README.md
index 589da6944..26c10a38c 100644
--- a/README.md
+++ b/README.md
@@ -526,6 +526,12 @@ pysen run format lint
 python -m pytest
 ```
 
+#### スナップショットの更新
+
+```bash
+python -m pytest --snapshot-update
+```
+
 ### タイポチェック
 
 [typos](https://github.com/crate-ci/typos) を使ってタイポのチェックを行っています。
diff --git a/poetry.lock b/poetry.lock
index 77bc5f94a..a62873ac8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1525,17 +1525,6 @@ files = [
     {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
 ]
 
-[[package]]
-name = "py"
-version = "1.11.0"
-description = "library with cross-python path, ini-parsing, io, code, log facilities"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-files = [
-    {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
-    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
-]
-
 [[package]]
 name = "pycodestyle"
 version = "2.11.0"
@@ -1762,27 +1751,23 @@ lint = ["black (>=19.10b0,<=22.10)", "flake8 (>=3.7,<5)", "flake8-bugbear", "iso
 
 [[package]]
 name = "pytest"
-version = "6.2.5"
+version = "7.4.3"
 description = "pytest: simple powerful testing with Python"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
-    {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
+    {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"},
+    {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"},
 ]
 
 [package.dependencies]
-atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
-attrs = ">=19.2.0"
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
 iniconfig = "*"
 packaging = "*"
 pluggy = ">=0.12,<2.0"
-py = ">=1.8.2"
-toml = "*"
 
 [package.extras]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
 [[package]]
 name = "python-multipart"
@@ -2200,16 +2185,19 @@ anyio = ">=3.4.0,<5"
 full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
 
 [[package]]
-name = "toml"
-version = "0.10.2"
-description = "Python Library for Tom's Obvious, Minimal Language"
+name = "syrupy"
+version = "4.6.0"
+description = "Pytest Snapshot Test Utility"
 optional = false
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+python-versions = ">=3.8.1,<4"
 files = [
-    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
-    {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
+    {file = "syrupy-4.6.0-py3-none-any.whl", hash = "sha256:747aae1bcf3cb3249e33b1e6d81097874d23615982d5686ebe637875b0775a1b"},
+    {file = "syrupy-4.6.0.tar.gz", hash = "sha256:231b1f5d00f1f85048ba81676c79448076189c4aef4d33f21ae32f3b4c565a54"},
 ]
 
+[package.dependencies]
+pytest = ">=7.0.0,<8.0.0"
+
 [[package]]
 name = "tomlkit"
 version = "0.12.1"
@@ -2443,4 +2431,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.11"
-content-hash = "eb3e0209e98c6df8760ef8dae1ccbd175af6a28e09ea5efc5e84b566b6c5b8d0"
+content-hash = "4711e0905b713acdd1e99867b48670e76e6840da7c7b853d3a5248de2d1f68b2"
diff --git a/pyproject.toml b/pyproject.toml
index c69cf96af..dbf9c1d30 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,10 +71,11 @@ flake8-bugbear = "^23.1.0"
 flake8 = "^6.0.0"
 isort = "^5.12.0"
 mypy = "^1.6.0"
-pytest = "^6.2.5"
+pytest = "^7.4.3"
 coveralls = "^3.2.0"
 poetry = "^1.3.1"
 httpx = "^0.25.0"          # NOTE: required by fastapi.testclient.TestClient
+syrupy = "^4.6.0"
 
 [tool.poetry.group.license.dependencies]
 pip-licenses = "^4.2.0"
diff --git a/requirements-test.txt b/requirements-test.txt
index f2b18f0e2..ebc32d97c 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,7 +1,6 @@
 aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
 asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
-atomicwrites==1.4.1 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "win32"
 attrs==23.1.0 ; python_version >= "3.11" and python_version < "3.12"
 black==22.12.0 ; python_version >= "3.11" and python_version < "3.12"
 build==0.10.0 ; python_version >= "3.11" and python_version < "3.12"
@@ -58,7 +57,6 @@ poetry-core==1.7.0 ; python_version >= "3.11" and python_version < "3.12"
 poetry-plugin-export==1.5.0 ; python_version >= "3.11" and python_version < "3.12"
 poetry==1.6.1 ; python_version >= "3.11" and python_version < "3.12"
 ptyprocess==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
-py==1.11.0 ; python_version >= "3.11" and python_version < "3.12"
 pycodestyle==2.11.0 ; python_version >= "3.11" and python_version < "3.12"
 pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12"
 pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12"
@@ -67,7 +65,7 @@ pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33ae
 pyproject-hooks==1.0.0 ; python_version >= "3.11" and python_version < "3.12"
 pyrsistent==0.19.3 ; python_version >= "3.11" and python_version < "3.12"
 pysen==0.10.5 ; python_version >= "3.11" and python_version < "3.12"
-pytest==6.2.5 ; python_version >= "3.11" and python_version < "3.12"
+pytest==7.4.3 ; python_version >= "3.11" and python_version < "3.12"
 python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12"
 pywin32-ctypes==0.2.2 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "win32"
 pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12"
@@ -84,7 +82,7 @@ sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
 soundfile==0.12.1 ; python_version >= "3.11" and python_version < "3.12"
 soxr==0.3.6 ; python_version >= "3.11" and python_version < "3.12"
 starlette==0.27.0 ; python_version >= "3.11" and python_version < "3.12"
-toml==0.10.2 ; python_version >= "3.11" and python_version < "3.12"
+syrupy==4.6.0 ; python_version >= "3.11" and python_version < "3.12"
 tomlkit==0.12.1 ; python_version >= "3.11" and python_version < "3.12"
 tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12"
 trove-classifiers==2023.8.7 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json b/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
new file mode 100644
index 000000000..f948bf0f1
--- /dev/null
+++ b/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
@@ -0,0 +1,82 @@
+[
+  {
+    "name": "dummy1",
+    "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
+    "styles": [
+      {
+        "id": 0,
+        "name": "style0"
+      },
+      {
+        "id": 2,
+        "name": "style1"
+      },
+      {
+        "id": 4,
+        "name": "style2"
+      },
+      {
+        "id": 6,
+        "name": "style3"
+      }
+    ],
+    "supported_features": {
+      "permitted_synthesis_morphing": "ALL"
+    },
+    "version": "mock"
+  },
+  {
+    "name": "dummy2",
+    "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9",
+    "styles": [
+      {
+        "id": 1,
+        "name": "style0"
+      },
+      {
+        "id": 3,
+        "name": "style1"
+      },
+      {
+        "id": 5,
+        "name": "style2"
+      },
+      {
+        "id": 7,
+        "name": "style3"
+      }
+    ],
+    "supported_features": {
+      "permitted_synthesis_morphing": "SELF_ONLY"
+    },
+    "version": "mock"
+  },
+  {
+    "name": "dummy3",
+    "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a",
+    "styles": [
+      {
+        "id": 8,
+        "name": "style0"
+      }
+    ],
+    "supported_features": {
+      "permitted_synthesis_morphing": "NOTHING"
+    },
+    "version": "mock"
+  },
+  {
+    "name": "dummy4",
+    "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
+    "styles": [
+      {
+        "id": 9,
+        "name": "style0"
+      }
+    ],
+    "supported_features": {
+      "permitted_synthesis_morphing": "ALL"
+    },
+    "version": "mock"
+  }
+]
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 9475d3b05..32da2a156 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -3,6 +3,8 @@
 import pytest
 from fastapi.testclient import TestClient
 from run import generate_app
+from syrupy.assertion import SnapshotAssertion
+from syrupy.extensions.json import JSONSnapshotExtension
 
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import SettingLoader
@@ -10,6 +12,19 @@
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
 
+@pytest.fixture
+def snapshot_json(snapshot: SnapshotAssertion):
+    """
+    syrupyでJSONをsnapshotするためのfixture。
+
+    Examples
+    --------
+    >>> def test_foo(snapshot_json: JSONSnapshotExtension):
+    >>>     assert snapshot_json == {"key": "value"}
+    """
+    return snapshot.use_extension(JSONSnapshotExtension)
+
+
 @pytest.fixture(scope="session")
 def client():
     synthesis_engines = make_synthesis_engines(use_gpu=False)
diff --git a/test/e2e/test_validate_speakers.py b/test/e2e/test_validate_speakers.py
new file mode 100644
index 000000000..c212e5fa0
--- /dev/null
+++ b/test/e2e/test_validate_speakers.py
@@ -0,0 +1,10 @@
+from fastapi.testclient import TestClient
+from syrupy.extensions.json import JSONSnapshotExtension
+
+
+def test_fetch_speakers_success(
+    client: TestClient, snapshot_json: JSONSnapshotExtension
+):
+    response = client.get("/speakers")
+    assert response.status_code == 200
+    assert snapshot_json == response.json()

From 71b5be59a2972782f8d937da2e46acdbd6897f92 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 25 Dec 2023 13:56:12 +0900
Subject: [PATCH 067/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E7=96=91?=
 =?UTF-8?q?=E5=95=8F=E6=96=87=20upspeak=20=E3=83=86=E3=82=B9=E3=83=88=20(#?=
 =?UTF-8?q?921)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: upspeakテストの関数test化

* refactor: 関数ネストの廃止

* refactor: upspeakテストの分割

* fix: キーワード引数削除のリバート
---
 test/test_synthesis_engine_base.py | 46 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py
index bc6d88f2c..d28048809 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_synthesis_engine_base.py
@@ -6,6 +6,7 @@
 
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
+from voicevox_engine.tts_pipeline.tts_engine_base import apply_interrogative_upspeak
 
 
 def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray):
@@ -192,14 +193,6 @@ def setUp(self):
         )
         self.synthesis_engine._synthesis_impl = Mock()
 
-    def create_accent_phrases_test_base(self, text: str, expected: List[AccentPhrase]):
-        actual = self.synthesis_engine.create_accent_phrases(text, 1)
-        self.assertEqual(
-            expected,
-            actual,
-            "case(text:" + text + ")",
-        )
-
     def create_synthesis_test_base(
         self,
         text: str,
@@ -209,29 +202,22 @@ def create_synthesis_test_base(
         """音声合成時に疑問文モーラ処理を行っているかどうかを検証
         (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
         """
-        accent_phrases = self.synthesis_engine.create_accent_phrases(text, 1)
-        query = create_mock_query(accent_phrases=accent_phrases)
-        self.synthesis_engine.synthesis(
-            query, 0, enable_interrogative_upspeak=enable_interrogative_upspeak
-        )
-        # _synthesis_implの第一引数に与えられたqueryを検証
-        actual = self.synthesis_engine._synthesis_impl.call_args[0][0].accent_phrases
-
-        self.assertEqual(
-            expected,
-            actual,
-            "case(text:" + text + ")",
-        )
+        inputs = self.synthesis_engine.create_accent_phrases(text, 1)
+        outputs = apply_interrogative_upspeak(inputs, enable_interrogative_upspeak)
+        self.assertEqual(expected, outputs, f"case(text:{text})")
 
     def test_create_accent_phrases(self):
         """accent_phrasesの作成時では疑問文モーラ処理を行わない
         (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
         """
+        text = "これはありますか？"
         expected = koreha_arimasuka_base_expected()
         expected[-1].is_interrogative = True
-        self.create_accent_phrases_test_base(text="これはありますか？", expected=expected)
+        actual = self.synthesis_engine.create_accent_phrases(text, 1)
+        self.assertEqual(expected, actual, f"case(text:{text})")
 
-    def test_synthesis_interrogative(self):
+    def test_upspeak_voiced_last_mora(self):
+        # voiced + "？" + flagON -> upspeak
         expected = koreha_arimasuka_base_expected()
         expected[-1].is_interrogative = True
         expected[-1].moras += [
@@ -250,6 +236,7 @@ def test_synthesis_interrogative(self):
             enable_interrogative_upspeak=True,
         )
 
+        # voiced + "？" + flagOFF -> non-upspeak
         expected = koreha_arimasuka_base_expected()
         expected[-1].is_interrogative = True
         self.create_synthesis_test_base(
@@ -258,6 +245,7 @@ def test_synthesis_interrogative(self):
             enable_interrogative_upspeak=False,
         )
 
+        # voiced + "" + flagON -> non-upspeak
         expected = koreha_arimasuka_base_expected()
         self.create_synthesis_test_base(
             text="これはありますか",
@@ -265,6 +253,7 @@ def test_synthesis_interrogative(self):
             enable_interrogative_upspeak=True,
         )
 
+    def test_upspeak_voiced_N_last_mora(self):
         def nn_base_expected():
             return [
                 AccentPhrase(
@@ -284,6 +273,7 @@ def nn_base_expected():
                 )
             ]
 
+        # voiced + "" + flagON -> upspeak
         expected = nn_base_expected()
         self.create_synthesis_test_base(
             text="ん",
@@ -291,6 +281,7 @@ def nn_base_expected():
             enable_interrogative_upspeak=True,
         )
 
+        # voiced + "？" + flagON -> upspeak
         expected = nn_base_expected()
         expected[-1].is_interrogative = True
         expected[-1].moras += [
@@ -309,6 +300,7 @@ def nn_base_expected():
             enable_interrogative_upspeak=True,
         )
 
+        # voiced + "？" + flagOFF -> non-upspeak
         expected = nn_base_expected()
         expected[-1].is_interrogative = True
         self.create_synthesis_test_base(
@@ -317,6 +309,7 @@ def nn_base_expected():
             enable_interrogative_upspeak=False,
         )
 
+    def test_upspeak_unvoiced_last_mora(self):
         def ltu_base_expected():
             return [
                 AccentPhrase(
@@ -336,6 +329,7 @@ def ltu_base_expected():
                 )
             ]
 
+        # unvoiced + "" + flagON -> non-upspeak
         expected = ltu_base_expected()
         self.create_synthesis_test_base(
             text="っ",
@@ -343,6 +337,7 @@ def ltu_base_expected():
             enable_interrogative_upspeak=True,
         )
 
+        # unvoiced + "？" + flagON -> non-upspeak
         expected = ltu_base_expected()
         expected[-1].is_interrogative = True
         self.create_synthesis_test_base(
@@ -351,6 +346,7 @@ def ltu_base_expected():
             enable_interrogative_upspeak=True,
         )
 
+        # unvoiced + "？" + flagOFF -> non-upspeak
         expected = ltu_base_expected()
         expected[-1].is_interrogative = True
         self.create_synthesis_test_base(
@@ -359,6 +355,7 @@ def ltu_base_expected():
             enable_interrogative_upspeak=False,
         )
 
+    def test_upspeak_voiced_u_last_mora(self):
         def su_base_expected():
             return [
                 AccentPhrase(
@@ -378,6 +375,7 @@ def su_base_expected():
                 )
             ]
 
+        # voiced + "" + flagON -> non-upspeak
         expected = su_base_expected()
         self.create_synthesis_test_base(
             text="す",
@@ -385,6 +383,7 @@ def su_base_expected():
             enable_interrogative_upspeak=True,
         )
 
+        # voiced + "？" + flagON -> upspeak
         expected = su_base_expected()
         expected[-1].is_interrogative = True
         expected[-1].moras += [
@@ -403,6 +402,7 @@ def su_base_expected():
             enable_interrogative_upspeak=True,
         )
 
+        # voiced + "？" + flagOFF -> non-upspeak
         expected = su_base_expected()
         expected[-1].is_interrogative = True
         self.create_synthesis_test_base(

From 3ac441097862f85010d522f06e4229b19b4be7d5 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 25 Dec 2023 14:50:21 +0900
Subject: [PATCH 068/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=95?=
 =?UTF-8?q?=E3=83=AB=E3=82=B3=E3=83=B3=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E3=83=A9=E3=83=99=E3=83=AB=E3=81=AE=E8=A6=8B=E9=80=9A=E3=81=97?=
 =?UTF-8?q?=E6=94=B9=E5=96=84=20(#926)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: 関数ネストの削除と引数整理

* fix: 入力変更のリバート

* fix: 変数名変更のリバート

* refactor: test のリネーム
---
 test/test_full_context_label.py               | 119 +++++++++---------
 .../tts_pipeline/full_context_label.py        |  33 ++---
 2 files changed, 69 insertions(+), 83 deletions(-)

diff --git a/test/test_full_context_label.py b/test/test_full_context_label.py
index 48019ddbe..76506ff46 100644
--- a/test/test_full_context_label.py
+++ b/test/test_full_context_label.py
@@ -36,7 +36,7 @@ def features(ojt_container: OjtContainer):
     return [contexts_to_feature(p.contexts) for p in ojt_container.labels]
 
 
-class TestBasePhonemes(TestCase):
+class TestBaseLabels(TestCase):
     def setUp(self):
         super().setUp()
         # pyopenjtalk.extract_fullcontext("こんにちは、ヒホです。")の結果
@@ -124,7 +124,7 @@ def setUp(self):
             + "/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:1_4/I:xx-xx"
             + "@xx+xx&xx-xx|xx+xx/J:xx_xx/K:2+2-9",
         ]
-        self.phonemes_hello_hiho = [
+        self.labels_hello_hiho = [
             Label.from_feature(feature) for feature in self.test_case_hello_hiho
         ]
 
@@ -139,16 +139,18 @@ def space_jointed_phonemes(ojt_container: OjtContainer) -> str:
     return " ".join([label.phoneme for label in ojt_container.labels])
 
 
-class TestPhoneme(TestBasePhonemes):
+class TestLabel(TestBaseLabels):
     def test_phoneme(self):
+        """Label に含まれる音素をテスト"""
         self.assertEqual(
-            " ".join([phoneme.phoneme for phoneme in self.phonemes_hello_hiho]),
+            " ".join([label.phoneme for label in self.labels_hello_hiho]),
             "sil k o N n i ch i w a pau h i h o d e s U sil",
         )
 
     def test_is_pause(self):
+        """Label のポーズ判定をテスト"""
         self.assertEqual(
-            [phoneme.is_pause() for phoneme in self.phonemes_hello_hiho],
+            [label.is_pause() for label in self.labels_hello_hiho],
             [
                 True,  # sil
                 False,  # k
@@ -173,60 +175,54 @@ def test_is_pause(self):
             ],
         )
 
-    def test_label(self) -> None:
+    def test_feature(self) -> None:
+        """Label に含まれる features をテスト"""
         self.assertEqual(
-            [
-                contexts_to_feature(phoneme.contexts)
-                for phoneme in self.phonemes_hello_hiho
-            ],
+            [contexts_to_feature(label.contexts) for label in self.labels_hello_hiho],
             self.test_case_hello_hiho,
         )
 
 
-class TestMora(TestBasePhonemes):
+class TestMoraLabel(TestBaseLabels):
     def setUp(self) -> None:
         super().setUp()
         # contexts["a2"] == "1" ko
         self.mora_hello_1 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[1], vowel=self.phonemes_hello_hiho[2]
+            consonant=self.labels_hello_hiho[1], vowel=self.labels_hello_hiho[2]
         )
         # contexts["a2"] == "2" N
-        self.mora_hello_2 = MoraLabel(consonant=None, vowel=self.phonemes_hello_hiho[3])
+        self.mora_hello_2 = MoraLabel(consonant=None, vowel=self.labels_hello_hiho[3])
         # contexts["a2"] == "3" ni
         self.mora_hello_3 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[4], vowel=self.phonemes_hello_hiho[5]
+            consonant=self.labels_hello_hiho[4], vowel=self.labels_hello_hiho[5]
         )
         # contexts["a2"] == "4" chi
         self.mora_hello_4 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[6], vowel=self.phonemes_hello_hiho[7]
+            consonant=self.labels_hello_hiho[6], vowel=self.labels_hello_hiho[7]
         )
         # contexts["a2"] == "5" wa
         self.mora_hello_5 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[8], vowel=self.phonemes_hello_hiho[9]
+            consonant=self.labels_hello_hiho[8], vowel=self.labels_hello_hiho[9]
         )
         # contexts["a2"] == "1" hi
         self.mora_hiho_1 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[11], vowel=self.phonemes_hello_hiho[12]
+            consonant=self.labels_hello_hiho[11], vowel=self.labels_hello_hiho[12]
         )
         # contexts["a2"] == "2" ho
         self.mora_hiho_2 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[13], vowel=self.phonemes_hello_hiho[14]
+            consonant=self.labels_hello_hiho[13], vowel=self.labels_hello_hiho[14]
         )
         # contexts["a2"] == "3" de
         self.mora_hiho_3 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[15], vowel=self.phonemes_hello_hiho[16]
+            consonant=self.labels_hello_hiho[15], vowel=self.labels_hello_hiho[16]
         )
         # contexts["a2"] == "1" sU
         self.mora_hiho_4 = MoraLabel(
-            consonant=self.phonemes_hello_hiho[17], vowel=self.phonemes_hello_hiho[18]
-        )
-
-    def assert_labels(self, mora: MoraLabel, label_start: int, label_end: int) -> None:
-        self.assertEqual(
-            features(mora), self.test_case_hello_hiho[label_start:label_end]
+            consonant=self.labels_hello_hiho[17], vowel=self.labels_hello_hiho[18]
         )
 
     def test_phonemes(self) -> None:
+        """MoraLabel に含まれる音素系列をテスト"""
         self.assertEqual(jointed_phonemes(self.mora_hello_1), "ko")
         self.assertEqual(jointed_phonemes(self.mora_hello_2), "N")
         self.assertEqual(jointed_phonemes(self.mora_hello_3), "ni")
@@ -237,83 +233,86 @@ def test_phonemes(self) -> None:
         self.assertEqual(jointed_phonemes(self.mora_hiho_3), "de")
         self.assertEqual(jointed_phonemes(self.mora_hiho_4), "sU")
 
-    def test_labels(self) -> None:
-        self.assert_labels(self.mora_hello_1, 1, 3)
-        self.assert_labels(self.mora_hello_2, 3, 4)
-        self.assert_labels(self.mora_hello_3, 4, 6)
-        self.assert_labels(self.mora_hello_4, 6, 8)
-        self.assert_labels(self.mora_hello_5, 8, 10)
-        self.assert_labels(self.mora_hiho_1, 11, 13)
-        self.assert_labels(self.mora_hiho_2, 13, 15)
-        self.assert_labels(self.mora_hiho_3, 15, 17)
-        self.assert_labels(self.mora_hiho_4, 17, 19)
-
-
-class TestAccentPhrase(TestBasePhonemes):
+    def test_features(self) -> None:
+        """MoraLabel に含まれる features をテスト"""
+        expects = self.test_case_hello_hiho
+        self.assertEqual(features(self.mora_hello_1), expects[1:3])
+        self.assertEqual(features(self.mora_hello_2), expects[3:4])
+        self.assertEqual(features(self.mora_hello_3), expects[4:6])
+        self.assertEqual(features(self.mora_hello_4), expects[6:8])
+        self.assertEqual(features(self.mora_hello_5), expects[8:10])
+        self.assertEqual(features(self.mora_hiho_1), expects[11:13])
+        self.assertEqual(features(self.mora_hiho_2), expects[13:15])
+        self.assertEqual(features(self.mora_hiho_3), expects[15:17])
+        self.assertEqual(features(self.mora_hiho_4), expects[17:19])
+
+
+class TestAccentPhraseLabel(TestBaseLabels):
     def setUp(self) -> None:
         super().setUp()
         # TODO: ValueErrorを吐く作為的ではない自然な例の模索
         # 存在しないなら放置でよい
         self.accent_phrase_hello = AccentPhraseLabel.from_labels(
-            self.phonemes_hello_hiho[1:10]
+            self.labels_hello_hiho[1:10]
         )
         self.accent_phrase_hiho = AccentPhraseLabel.from_labels(
-            self.phonemes_hello_hiho[11:19]
+            self.labels_hello_hiho[11:19]
         )
 
     def test_accent(self):
+        """AccentPhraseLabel に含まれるアクセント位置をテスト"""
         self.assertEqual(self.accent_phrase_hello.accent, 5)
         self.assertEqual(self.accent_phrase_hiho.accent, 1)
 
     def test_phonemes(self):
+        """AccentPhraseLabel に含まれる音素系列をテスト"""
         outputs_hello = space_jointed_phonemes(self.accent_phrase_hello)
         outputs_hiho = space_jointed_phonemes(self.accent_phrase_hiho)
         self.assertEqual(outputs_hello, "k o N n i ch i w a")
         self.assertEqual(outputs_hiho, "h i h o d e s U")
 
-    def test_labels(self):
-        self.assertEqual(
-            features(self.accent_phrase_hello), self.test_case_hello_hiho[1:10]
-        )
-        self.assertEqual(
-            features(self.accent_phrase_hiho), self.test_case_hello_hiho[11:19]
-        )
+    def test_features(self):
+        """AccentPhraseLabel に含まれる features をテスト"""
+        expects = self.test_case_hello_hiho
+        self.assertEqual(features(self.accent_phrase_hello), expects[1:10])
+        self.assertEqual(features(self.accent_phrase_hiho), expects[11:19])
 
 
-class TestBreathGroup(TestBasePhonemes):
+class TestBreathGroupLabel(TestBaseLabels):
     def setUp(self) -> None:
         super().setUp()
         self.breath_group_hello = BreathGroupLabel.from_labels(
-            self.phonemes_hello_hiho[1:10]
+            self.labels_hello_hiho[1:10]
         )
         self.breath_group_hiho = BreathGroupLabel.from_labels(
-            self.phonemes_hello_hiho[11:19]
+            self.labels_hello_hiho[11:19]
         )
 
     def test_phonemes(self):
+        """BreathGroupLabel に含まれる音素系列をテスト"""
         outputs_hello = space_jointed_phonemes(self.breath_group_hello)
         outputs_hiho = space_jointed_phonemes(self.breath_group_hiho)
         self.assertEqual(outputs_hello, "k o N n i ch i w a")
         self.assertEqual(outputs_hiho, "h i h o d e s U")
 
-    def test_labels(self):
-        self.assertEqual(
-            features(self.breath_group_hello), self.test_case_hello_hiho[1:10]
-        )
-        self.assertEqual(
-            features(self.breath_group_hiho), self.test_case_hello_hiho[11:19]
-        )
+    def test_features(self):
+        """BreathGroupLabel に含まれる features をテスト"""
+        expects = self.test_case_hello_hiho
+        self.assertEqual(features(self.breath_group_hello), expects[1:10])
+        self.assertEqual(features(self.breath_group_hiho), expects[11:19])
 
 
-class TestUtterance(TestBasePhonemes):
+class TestUtteranceLabel(TestBaseLabels):
     def setUp(self) -> None:
         super().setUp()
-        self.utterance_hello_hiho = UtteranceLabel.from_labels(self.phonemes_hello_hiho)
+        self.utterance_hello_hiho = UtteranceLabel.from_labels(self.labels_hello_hiho)
 
     def test_phonemes(self):
+        """UtteranceLabel に含まれる音素系列をテスト"""
         outputs_hello_hiho = space_jointed_phonemes(self.utterance_hello_hiho)
         expects_hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
         self.assertEqual(outputs_hello_hiho, expects_hello_hiho)
 
-    def test_labels(self):
+    def test_features(self):
+        """UtteranceLabel に含まれる features をテスト"""
         self.assertEqual(features(self.utterance_hello_hiho), self.test_case_hello_hiho)
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/full_context_label.py
index 54149f3de..c183d31fc 100644
--- a/voicevox_engine/tts_pipeline/full_context_label.py
+++ b/voicevox_engine/tts_pipeline/full_context_label.py
@@ -230,25 +230,8 @@ def labels(self) -> list[Label]:
         return labels
 
 
-def _extract_utterance_label(text: str) -> UtteranceLabel:
-    """日本語文からUtteranceLabelを抽出する"""
-    features: list[str] = pyopenjtalk.extract_fullcontext(text)  # type: ignore
-    labels = [Label.from_feature(feature) for feature in features]
-    utterance = UtteranceLabel.from_labels(labels)
-    return utterance
-
-
 def mora_to_text(mora: str) -> str:
-    """
-    Parameters
-    ----------
-    mora : str
-        モーラ音素文字列
-    Returns
-    -------
-    mora : str
-        モーラ音素文字列
-    """
+    """モーラ相当の音素文字系列を日本語カタカナ文へ変換する（例: 'hO' -> 'ホ')"""
     if mora[-1:] in ["A", "I", "U", "E", "O"]:
         # 無声化母音を小文字に
         mora = mora[:-1] + mora[-1].lower()
@@ -262,7 +245,7 @@ def _mora_labels_to_moras(mora_labels: list[MoraLabel]) -> list[Mora]:
     """MoraLabel系列をMora系列へキャストする。音素長と音高は 0 初期化"""
     return [
         Mora(
-            text=mora_to_text("".join([p.phoneme for p in mora.labels])),
+            text=mora_to_text("".join([label.phoneme for label in mora.labels])),
             consonant=(mora.consonant.phoneme if mora.consonant is not None else None),
             consonant_length=0 if mora.consonant is not None else None,
             vowel=mora.vowel.phoneme,
@@ -275,6 +258,9 @@ def _mora_labels_to_moras(mora_labels: list[MoraLabel]) -> list[Mora]:
 
 def _utterance_to_accent_phrases(utterance: UtteranceLabel) -> list[AccentPhrase]:
     """UtteranceLabelインスタンスをアクセント句系列へドメイン変換する"""
+    if len(utterance.breath_groups) == 0:
+        return []
+
     return [
         AccentPhrase(
             moras=_mora_labels_to_moras(accent_phrase.moras),
@@ -307,9 +293,10 @@ def text_to_accent_phrases(text: str) -> list[AccentPhrase]:
         return []
 
     # 日本語文からUtteranceLabelを抽出する
-    utterance = _extract_utterance_label(text)
-    if len(utterance.breath_groups) == 0:
-        return []
+    features: list[str] = pyopenjtalk.extract_fullcontext(text)  # type: ignore
+    utterance = UtteranceLabel.from_labels(list(map(Label.from_feature, features)))
 
     # ドメインを変換する
-    return _utterance_to_accent_phrases(utterance)
+    accent_phrases = _utterance_to_accent_phrases(utterance)
+
+    return accent_phrases

From 11556681805f8f7cd78df9cb46b854da18b6c177 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 26 Dec 2023 00:59:53 +0900
Subject: [PATCH 069/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=B3?=
 =?UTF-8?q?=E3=82=A2=E6=A9=9F=E8=83=BD=E3=82=A2=E3=82=AF=E3=82=BB=E3=82=B9?=
 =?UTF-8?q?=E3=81=AE=E5=88=86=E9=9B=A2=20(#927)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `TTSEngineBase.default_sampling_rate` の廃止

* refactor: `TTSEngineBase.supported_devices` の廃止

* fix: lint

* fix: lint

* refactor: `TTSEngineBase.speakers` の廃止

* refactor: `TTSEngineBase.initialize_style_id_synthesis` の廃止

* refactor: `TTSEngineBase.is_initialized_style_id_synthesis` の廃止

* fix: lint
---
 run.py                                        | 44 +++++++++------
 voicevox_engine/dev/synthesis_engine/mock.py  | 18 -------
 voicevox_engine/metas/MetasStore.py           | 10 ++--
 voicevox_engine/morphing.py                   |  5 +-
 voicevox_engine/tts_pipeline/__init__.py      |  3 +-
 voicevox_engine/tts_pipeline/tts_engine.py    | 35 +++++-------
 .../tts_pipeline/tts_engine_base.py           | 54 +------------------
 7 files changed, 51 insertions(+), 118 deletions(-)

diff --git a/run.py b/run.py
index 0e7a33cba..22eeceb35 100644
--- a/run.py
+++ b/run.py
@@ -65,7 +65,11 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.tts_pipeline import TTSEngineBase, make_synthesis_engines
+from voicevox_engine.tts_pipeline import (
+    CoreAdapter,
+    TTSEngineBase,
+    make_synthesis_engines,
+)
 from voicevox_engine.tts_pipeline.kana_parser import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
@@ -143,8 +147,6 @@ def generate_app(
     if root_dir is None:
         root_dir = engine_root()
 
-    default_sampling_rate = synthesis_engines[latest_core_version].default_sampling_rate
-
     app = FastAPI(
         title="VOICEVOX Engine",
         description="VOICEVOXの音声合成エンジンです。",
@@ -234,6 +236,14 @@ def get_engine(core_version: Optional[str]) -> TTSEngineBase:
             return synthesis_engines[core_version]
         raise HTTPException(status_code=422, detail="不明なバージョンです")
 
+    def get_core(core_version: Optional[str]) -> CoreAdapter:
+        """指定したバージョンのコアを取得する"""
+        if core_version is None:
+            return synthesis_engines[latest_core_version].core
+        if core_version in synthesis_engines:
+            return synthesis_engines[core_version].core
+        raise HTTPException(status_code=422, detail="不明なバージョンです")
+
     @app.post(
         "/audio_query",
         response_model=AudioQuery,
@@ -251,6 +261,7 @@ def audio_query(
         """
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
+        core = get_core(core_version)
         accent_phrases = engine.create_accent_phrases(text, style_id=style_id)
         return AudioQuery(
             accent_phrases=accent_phrases,
@@ -260,7 +271,7 @@ def audio_query(
             volumeScale=1,
             prePhonemeLength=0.1,
             postPhonemeLength=0.1,
-            outputSamplingRate=default_sampling_rate,
+            outputSamplingRate=core.default_sampling_rate,
             outputStereo=False,
             kana=create_kana(accent_phrases),
         )
@@ -280,6 +291,7 @@ def audio_query_from_preset(
         音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
         """
         engine = get_engine(core_version)
+        core = get_core(core_version)
         try:
             presets = preset_manager.load_presets()
         except PresetError as err:
@@ -302,7 +314,7 @@ def audio_query_from_preset(
             volumeScale=selected_preset.volumeScale,
             prePhonemeLength=selected_preset.prePhonemeLength,
             postPhonemeLength=selected_preset.postPhonemeLength,
-            outputSamplingRate=default_sampling_rate,
+            outputSamplingRate=core.default_sampling_rate,
             outputStereo=False,
             kana=create_kana(accent_phrases),
         )
@@ -554,10 +566,10 @@ def morphable_targets(
         プロパティが存在しない場合は、モーフィングが許可されているとみなします。
         返り値の話者はstring型なので注意。
         """
-        engine = get_engine(core_version)
+        core = get_core(core_version)
 
         try:
-            speakers = metas_store.load_combined_metas(engine=engine)
+            speakers = metas_store.load_combined_metas(core=core)
             morphable_targets = get_morphable_targets(
                 speakers=speakers, base_speakers=base_speakers
             )
@@ -596,9 +608,10 @@ def _synthesis_morphing(
         モーフィングの割合は`morph_rate`で指定でき、0.0でベースの話者、1.0でターゲットの話者に近づきます。
         """
         engine = get_engine(core_version)
+        core = get_core(core_version)
 
         try:
-            speakers = metas_store.load_combined_metas(engine=engine)
+            speakers = metas_store.load_combined_metas(core=core)
             speaker_lookup = construct_lookup(speakers=speakers)
             is_permitted = is_synthesis_morphing_permitted(
                 speaker_lookup, base_speaker, target_speaker
@@ -616,6 +629,7 @@ def _synthesis_morphing(
         # 生成したパラメータはキャッシュされる
         morph_param = synthesis_morphing_parameter(
             engine=engine,
+            core=core,
             query=query,
             base_speaker=base_speaker,
             target_speaker=target_speaker,
@@ -770,8 +784,7 @@ def core_versions() -> Response:
     def speakers(
         core_version: str | None = None,
     ) -> list[Speaker]:
-        engine = get_engine(core_version)
-        return metas_store.load_combined_metas(engine=engine)
+        return metas_store.load_combined_metas(get_core(core_version))
 
     @app.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"])
     def speaker_info(
@@ -811,7 +824,7 @@ def speaker_info(
         #           ...
 
         # 該当話者の検索
-        speakers = json.loads(get_engine(core_version).speakers)
+        speakers = json.loads(get_core(core_version).speakers)
         for i in range(len(speakers)):
             if speakers[i]["speaker_uuid"] == speaker_uuid:
                 speaker = speakers[i]
@@ -960,8 +973,8 @@ def initialize_style_id(
         指定されたstyle_idのスタイルを初期化します。
         実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。
         """
-        engine = get_engine(core_version)
-        engine.initialize_style_id_synthesis(style_id=style_id, skip_reinit=skip_reinit)
+        core = get_core(core_version)
+        core.initialize_style_id_synthesis(style_id=style_id, skip_reinit=skip_reinit)
         return Response(status_code=204)
 
     @app.get("/is_initialized_style_id", response_model=bool, tags=["その他"])
@@ -972,8 +985,7 @@ def is_initialized_style_id(
         """
         指定されたstyle_idのスタイルが初期化されているかどうかを返します。
         """
-        engine = get_engine(core_version)
-        return engine.is_initialized_style_id_synthesis(style_id)
+        return get_core(core_version).is_initialized_style_id_synthesis(style_id)
 
     @app.post("/initialize_speaker", status_code=204, tags=["その他"], deprecated=True)
     def initialize_speaker(
@@ -1163,7 +1175,7 @@ def import_user_dict_words(
     def supported_devices(
         core_version: str | None = None,
     ) -> Response:
-        supported_devices = get_engine(core_version).supported_devices
+        supported_devices = get_core(core_version).supported_devices
         if supported_devices is None:
             raise HTTPException(status_code=422, detail="非対応の機能です。")
         return Response(
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index da6b2eb87..c64184e56 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -21,24 +21,6 @@ def __init__(self, core: CoreWrapper):
         self.core = CoreAdapter(core)
         # NOTE: self.coreは将来的に消す予定
 
-    @property
-    def default_sampling_rate(self) -> int:
-        return self.core.default_sampling_rate
-
-    @property
-    def speakers(self) -> str:
-        return self.core.speakers
-
-    @property
-    def supported_devices(self) -> str | None:
-        return self.core.supported_devices
-
-    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
-        return self.core.initialize_style_id_synthesis(style_id, skip_reinit)
-
-    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
-        return self.core.is_initialized_style_id_synthesis(style_id)
-
     def replace_phoneme_length(
         self, accent_phrases: List[AccentPhrase], style_id: int
     ) -> List[AccentPhrase]:
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 3eb1e4eaa..d24232d50 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -5,7 +5,7 @@
 from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo
 
 if TYPE_CHECKING:
-    from voicevox_engine.tts_pipeline.tts_engine_base import TTSEngineBase
+    from voicevox_engine.tts_pipeline.tts_engine import CoreAdapter
 
 
 class MetasStore:
@@ -30,20 +30,20 @@ def __init__(self, engine_speakers_path: Path) -> None:
 
     # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで
     # TTSEngineBaseによる循環importを修正する
-    def load_combined_metas(self, engine: "TTSEngineBase") -> List[Speaker]:
+    def load_combined_metas(self, core: "CoreAdapter") -> List[Speaker]:
         """
         コアに含まれる話者メタ情報とエンジンに含まれる話者メタ情報を統合
         Parameters
         ----------
-        engine : TTSEngineBase
-            コアに含まれる話者メタ情報をもったエンジン
+        core : CoreAdapter
+            話者メタ情報をもったコア
         Returns
         -------
         ret : List[Speaker]
             エンジンとコアに含まれる話者メタ情報
         """
         # コアに含まれる話者メタ情報の収集
-        core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)]
+        core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(core.speakers)]
         # エンジンに含まれる話者メタ情報との統合
         return [
             Speaker(
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index ee7bf446d..6b7a6d0dd 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -10,7 +10,7 @@
 from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo
 from .metas.MetasStore import construct_lookup
 from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
-from .tts_pipeline import TTSEngine
+from .tts_pipeline import CoreAdapter, TTSEngine
 
 
 # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398  # noqa
@@ -129,6 +129,7 @@ def is_synthesis_morphing_permitted(
 
 def synthesis_morphing_parameter(
     engine: TTSEngine,
+    core: CoreAdapter,
     query: AudioQuery,
     base_speaker: int,
     target_speaker: int,
@@ -136,7 +137,7 @@ def synthesis_morphing_parameter(
     query = deepcopy(query)
 
     # 不具合回避のためデフォルトのサンプリングレートでWORLDに掛けた後に指定のサンプリングレートに変換する
-    query.outputSamplingRate = engine.default_sampling_rate
+    query.outputSamplingRate = core.default_sampling_rate
 
     # WORLDに掛けるため合成はモノラルで行う
     query.outputStereo = False
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
index 8aeea1b06..5ccc2f800 100644
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -1,10 +1,11 @@
 from ..core_wrapper import CoreWrapper, load_runtime_lib
 from .make_tts_engines import make_synthesis_engines
-from .tts_engine import TTSEngine
+from .tts_engine import CoreAdapter, TTSEngine
 from .tts_engine_base import TTSEngineBase
 
 __all__ = [
     "CoreWrapper",
+    "CoreAdapter",
     "load_runtime_lib",
     "make_synthesis_engines",
     "TTSEngine",
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 8f3edd3e9..838f40a6a 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -227,13 +227,11 @@ def default_sampling_rate(self) -> int:
     @property
     def speakers(self) -> str:
         """話者情報（json文字列）"""
-        # Coreプロキシ
         return self.core.metas()
 
     @property
     def supported_devices(self) -> str | None:
-        """デバイスサポート情報"""
-        # Coreプロキシ
+        """デバイスサポート情報（None: 情報無し）"""
         try:
             supported_devices = self.core.supported_devices()
         except OldCoreError:
@@ -241,7 +239,16 @@ def supported_devices(self) -> str | None:
         return supported_devices
 
     def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
-        # Core管理
+        """
+        指定したスタイルでの音声合成を初期化する。
+        何度も実行可能。未実装の場合は何もしない。
+        Parameters
+        ----------
+        style_id : int
+            スタイルID
+        skip_reinit : bool
+            True の場合, 既に初期化済みの話者の再初期化をスキップします
+        """
         try:
             with self.mutex:
                 # 以下の条件のいずれかを満たす場合, 初期化を実行する
@@ -253,7 +260,7 @@ def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
             pass  # コアが古い場合はどうしようもないので何もしない
 
     def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
-        # Coreプロキシ
+        """指定したスタイルでの音声合成が初期化されているかどうかを返す"""
         try:
             return self.core.is_model_loaded(style_id)
         except OldCoreError:
@@ -320,24 +327,6 @@ def __init__(self, core: CoreWrapper):
         self.core = CoreAdapter(core)
         # NOTE: self.coreは将来的に消す予定
 
-    @property
-    def default_sampling_rate(self) -> int:
-        return self.core.default_sampling_rate
-
-    @property
-    def speakers(self) -> str:
-        return self.core.speakers
-
-    @property
-    def supported_devices(self) -> str | None:
-        return self.core.supported_devices
-
-    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
-        return self.core.initialize_style_id_synthesis(style_id, skip_reinit)
-
-    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
-        return self.core.is_initialized_style_id_synthesis(style_id)
-
     def replace_phoneme_length(
         self, accent_phrases: list[AccentPhrase], style_id: int
     ) -> list[AccentPhrase]:
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index 4357be6d0..5e427d0d3 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -1,6 +1,6 @@
 import copy
 from abc import ABCMeta, abstractmethod
-from typing import List, Optional
+from typing import List
 
 import numpy as np
 
@@ -42,58 +42,6 @@ def apply_interrogative_upspeak(
 
 
 class TTSEngineBase(metaclass=ABCMeta):
-    @property
-    @abstractmethod
-    def default_sampling_rate(self) -> int:
-        raise NotImplementedError
-
-    @property
-    @abstractmethod
-    def speakers(self) -> str:
-        """話者情報（json文字列）"""
-        # FIXME: jsonではなくModelを返すようにする
-        raise NotImplementedError
-
-    @property
-    @abstractmethod
-    def supported_devices(self) -> Optional[str]:
-        """
-        デバイス対応情報
-        Returns
-        -------
-            対応デバイス一覧（None: 情報取得不可）
-        """
-        raise NotImplementedError
-
-    def initialize_style_id_synthesis(  # noqa: B027
-        self, style_id: int, skip_reinit: bool
-    ):
-        """
-        指定したスタイルでの音声合成を初期化する。
-        何度も実行可能。未実装の場合は何もしない。
-        Parameters
-        ----------
-        style_id : int
-            スタイルID
-        skip_reinit : bool
-            True の場合, 既に初期化済みの話者の再初期化をスキップします
-        """
-        pass
-
-    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
-        """
-        指定したスタイルでの音声合成が初期化されているかどうかを返す
-        Parameters
-        ----------
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        bool
-            初期化されているかどうか
-        """
-        return True
-
     @abstractmethod
     def replace_phoneme_length(
         self, accent_phrases: List[AccentPhrase], style_id: int

From f8ce0fa6bfb6bc671b07ed510bc4af593eb370df Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 26 Dec 2023 01:24:36 +0900
Subject: [PATCH 070/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`MockCoreWrapper`?=
 =?UTF-8?q?=20=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E7=94=9F=E6=88=90=20(#9?=
 =?UTF-8?q?28)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: MockCoreWrapperにおける生成

* fix: `Mock` 削除

* fix: lint
---
 voicevox_engine/core_wrapper.py  |  5 +---
 voicevox_engine/dev/core/mock.py | 47 ++++++++++++++++++++++++++++----
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/voicevox_engine/core_wrapper.py b/voicevox_engine/core_wrapper.py
index f7f697ffe..07d37ea4a 100644
--- a/voicevox_engine/core_wrapper.py
+++ b/voicevox_engine/core_wrapper.py
@@ -522,10 +522,7 @@ def metas(self) -> str:
         return self.core.metas().decode("utf-8")
 
     def yukarin_s_forward(
-        self,
-        length: int,
-        phoneme_list: np.ndarray,
-        style_id: np.ndarray,
+        self, length: int, phoneme_list: np.ndarray, style_id: np.ndarray
     ) -> np.ndarray:
         """
         音素列から、音素ごとの長さを求める関数
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 5d5aa6cd6..de75ec0b6 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -1,6 +1,8 @@
 import json
 from pathlib import Path
-from unittest.mock import Mock
+
+import numpy
+from numpy import ndarray
 
 from ...core_wrapper import CoreWrapper
 
@@ -17,10 +19,6 @@ def __init__(
     ) -> None:
         self.default_sampling_rate = 24000
 
-        self.yukarin_s_forward = Mock()
-        self.yukarin_sa_forward = Mock()
-        self.decode_forward = Mock()
-
     def metas(self) -> str:
         return json.dumps(
             [
@@ -65,6 +63,45 @@ def metas(self) -> str:
             ]
         )
 
+    def yukarin_s_forward(
+        self, length: int, phoneme_list: ndarray, style_id: ndarray
+    ) -> ndarray:
+        """音素系列サイズ・音素ID系列・スタイルIDから音素長系列を生成する"""
+        # Mock: 定数の音素長系列を生成。[0.1, 0.1, ...]
+        return 0.1 * numpy.ones((length,), dtype=numpy.float32)
+
+    def yukarin_sa_forward(
+        self,
+        length: int,
+        vowel_phoneme_list: ndarray,
+        consonant_phoneme_list: ndarray,
+        start_accent_list: ndarray,
+        end_accent_list: ndarray,
+        start_accent_phrase_list: ndarray,
+        end_accent_phrase_list: ndarray,
+        style_id: ndarray,
+    ) -> ndarray:
+        """モーラ系列サイズ・母音系列・子音系列・アクセント位置・アクセント句区切り・スタイルIDからモーラ音高系列を生成する"""
+        assert length > 1, "前後無音を必ず付与しなければならない"
+        # Mock: 定数のモーラ音高系列を生成。[0, 200, 100, 100, ..., 100, 0]
+        pitch = 100 * numpy.ones((1, length), dtype=numpy.float32)
+        pitch[0, 0] = 0.0  # 開始無音 (pau)
+        pitch[0, 1] = 200.0  # 分散 0 を避けるため
+        pitch[0, length] = 0.0  # 終了無音 (pau)
+        return pitch
+
+    def decode_forward(
+        self,
+        length: int,
+        phoneme_size: int,
+        f0: ndarray,
+        phoneme: ndarray,
+        style_id: ndarray,
+    ) -> ndarray:
+        """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDから音声波形を生成する"""
+        # Mock: 定数の音声波形を生成。[0.1, 0.1, ..., 0.1, 0.1]
+        return 0.1 * numpy.one((length * 256,), dtype=numpy.float32)
+
     def supported_devices(self):
         return json.dumps(
             {

From 742615b3d8df40d7ce386de092af2c527afcd73b Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 26 Dec 2023 01:54:54 +0900
Subject: [PATCH 071/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20AquesTalk?=
 =?UTF-8?q?=E9=A2=A8=E8=A8=98=E6=B3=95=E3=81=AE=E5=AE=9A=E7=BE=A9=E7=AF=80?=
 =?UTF-8?q?=20(#820)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add: AquesTalk風記法の定義節

* 追加: 静的リンク注釈

* Fix: AquesTalk風記法定義節の順序

* fix: typo

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 README.md                                   | 22 ++++++++++++++-------
 run.py                                      |  6 +++---
 voicevox_engine/model.py                    |  2 +-
 voicevox_engine/tts_pipeline/kana_parser.py | 22 ++++++++++-----------
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 26c10a38c..2c4da3ff5 100644
--- a/README.md
+++ b/README.md
@@ -73,16 +73,24 @@ curl -s \
 
 `style_id` に指定する値は `/speakers` エンドポイントで得られます。
 
-### 読み方を AquesTalk 風記法で取得・修正するサンプルコード
+### 読み方を AquesTalk 風記法で取得・修正
 
-`/audio_query`のレスポンスにはエンジンが判断した読み方が AquesTalk 風記法([本家の記法](https://www.a-quest.com/archive/manual/siyo_onseikigou.pdf)とは一部異なります)で記録されています。
-記法は次のルールに従います。
+#### AquesTalk 風記法
+<!-- NOTE: この節は静的リンクとして運用中なので変更しない方が良い(voicevox_engine#816) -->
+
+「**AquesTalk 風記法**」はカタカナと記号だけで読み方を指定する記法です。[AquesTalk 本家の記法](https://www.a-quest.com/archive/manual/siyo_onseikigou.pdf)とは一部が異なります。  
+AquesTalk 風記法は次のルールに従います：
 
 - 全てのカナはカタカナで記述される
-- アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。
-- カナの手前に`_`を入れるとそのカナは無声化される
-- アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を 1 つ指定する必要がある。
-- アクセント句末に`？`(全角)を入れることにより疑問文の発音ができる
+- アクセント句は `/` または `、` で区切る。 `、` で区切った場合に限り無音区間が挿入される。
+- カナの手前に `_` を入れるとそのカナは無声化される
+- アクセント位置を `'` で指定する。全てのアクセント句にはアクセント位置を 1 つ指定する必要がある。
+- アクセント句末に `？` (全角)を入れることにより疑問文の発音ができる
+
+#### AquesTalk 風記法のサンプルコード
+
+`/audio_query`のレスポンスにはエンジンが判断した読み方が[AquesTalk 風記法](#aquestalk-風記法)で記述されます。  
+これを修正することで音声の読み仮名やアクセントを制御できます。  
 
 ```bash
 # 読ませたい文章をutf-8でtext.txtに書き出す
diff --git a/run.py b/run.py
index 22eeceb35..b7a9255d8 100644
--- a/run.py
+++ b/run.py
@@ -340,7 +340,7 @@ def accent_phrases(
     ) -> list[AccentPhrase]:
         """
         テキストからアクセント句を得ます。
-        is_kanaが`true`のとき、テキストは次のAquesTalk風記法で解釈されます。デフォルトは`false`です。
+        is_kanaが`true`のとき、テキストは次のAquesTalk 風記法で解釈されます。デフォルトは`false`です。
         * 全てのカナはカタカナで記述される
         * アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。
         * カナの手前に`_`を入れるとそのカナは無声化される
@@ -1191,7 +1191,7 @@ def engine_manifest() -> EngineManifest:
         "/validate_kana",
         response_model=bool,
         tags=["その他"],
-        summary="テキストがAquesTalk風記法に従っているか判定する",
+        summary="テキストがAquesTalk 風記法に従っているか判定する",
         responses={
             400: {
                 "description": "テキストが不正です",
@@ -1201,7 +1201,7 @@ def engine_manifest() -> EngineManifest:
     )
     def validate_kana(text: str) -> bool:
         """
-        テキストがAquesTalk風記法に従っているかどうかを判定します。
+        テキストがAquesTalk 風記法に従っているかどうかを判定します。
         従っていない場合はエラーが返ります。
 
         Parameters
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index c4d19ba69..d2d7e30f7 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -59,7 +59,7 @@ class AudioQuery(BaseModel):
     postPhonemeLength: float = Field(title="音声の後の無音時間")
     outputSamplingRate: int = Field(title="音声データの出力サンプリングレート")
     outputStereo: bool = Field(title="音声データをステレオ出力するか否か")
-    kana: Optional[str] = Field(title="[読み取り専用]AquesTalk風記法によるテキスト。音声合成用のクエリとしては無視される")
+    kana: Optional[str] = Field(title="[読み取り専用]AquesTalk 風記法によるテキスト。音声合成用のクエリとしては無視される")
 
     def __hash__(self):
         items = [
diff --git a/voicevox_engine/tts_pipeline/kana_parser.py b/voicevox_engine/tts_pipeline/kana_parser.py
index 41b32575a..c7367358f 100644
--- a/voicevox_engine/tts_pipeline/kana_parser.py
+++ b/voicevox_engine/tts_pipeline/kana_parser.py
@@ -1,5 +1,5 @@
 """
-「AquesTalk風記法」を実装した AquesTalk風記法テキスト <-> アクセント句系列 変換。
+「AquesTalk 風記法」を実装した AquesTalk 風記法テキスト <-> アクセント句系列 変換。
 
 記法の規則は以下の通り。
 
@@ -11,7 +11,7 @@
 - `？` で疑問文
 - アクセント位置はちょうど１つ
 
-NOTE: ユーザー向け案内 `https://github.com/VOICEVOX/voicevox_engine/blob/master/README.md#読み方を-aquestalk風記法で取得修正するサンプルコード` # noqa
+NOTE: ユーザー向け案内 `https://github.com/VOICEVOX/voicevox_engine/blob/master/README.md#aquestalk-風記法` # noqa
 """
 
 from typing import List, Optional
@@ -21,14 +21,14 @@
 
 _LOOP_LIMIT = 300
 
-# AquesTalk風記法特殊文字
+# AquesTalk 風記法特殊文字
 _UNVOICE_SYMBOL = "_"  # 無声化
 _ACCENT_SYMBOL = "'"  # アクセント位置
 _NOPAUSE_DELIMITER = "/"  # ポーズ無しアクセント句境界
 _PAUSE_DELIMITER = "、"  # ポーズ有りアクセント句境界
 _WIDE_INTERROGATION_MARK = "？"  # 疑問形
 
-# AquesTalk風記法とモーラの対応（音素長・音高 0 初期化）
+# AquesTalk 風記法とモーラの対応（音素長・音高 0 初期化）
 _text2mora_with_unvoice = {}
 for text, (consonant, vowel) in openjtalk_text2mora.items():
     _text2mora_with_unvoice[text] = Mora(
@@ -54,12 +54,12 @@
 
 def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
     """
-    単一アクセント句に相当するAquesTalk風記法テキストからアクセント句オブジェクトを生成
+    単一アクセント句に相当するAquesTalk 風記法テキストからアクセント句オブジェクトを生成
     longest matchによりモーラ化。入力長Nに対し計算量O(N^2)。
     Parameters
     ----------
     phrase : str
-        単一アクセント句に相当するAquesTalk風記法テキスト
+        単一アクセント句に相当するAquesTalk 風記法テキスト
     Returns
     -------
     accent_phrase : AccentPhrase
@@ -118,11 +118,11 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
 
 def parse_kana(text: str) -> List[AccentPhrase]:
     """
-    AquesTalk風記法テキストからアクセント句系列を生成
+    AquesTalk 風記法テキストからアクセント句系列を生成
     Parameters
     ----------
     text : str
-        AquesTalk風記法テキスト
+        AquesTalk 風記法テキスト
     Returns
     -------
     parsed_results : List[AccentPhrase]
@@ -176,7 +176,7 @@ def parse_kana(text: str) -> List[AccentPhrase]:
 
 def create_kana(accent_phrases: List[AccentPhrase]) -> str:
     """
-    アクセント句系列からAquesTalk風記法テキストを生成
+    アクセント句系列からAquesTalk 風記法テキストを生成
     Parameters
     ----------
     accent_phrases : List[AccentPhrase]
@@ -184,10 +184,10 @@ def create_kana(accent_phrases: List[AccentPhrase]) -> str:
     Returns
     -------
     text : str
-        AquesTalk風記法テキスト
+        AquesTalk 風記法テキスト
     """
     text = ""
-    # アクセント句を先頭から逐次パースし、`text`末尾にAquesTalk風記法の文字を都度追加（ループ）
+    # アクセント句を先頭から逐次パースし、`text`末尾にAquesTalk 風記法の文字を都度追加（ループ）
     for i, phrase in enumerate(accent_phrases):
         for j, mora in enumerate(phrase.moras):
             # 「`_` で無声化」の実装

From 8e322f4dc1632a999379cfce67c68883b796c2c4 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 26 Dec 2023 02:25:01 +0900
Subject: [PATCH 072/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`.=5Fsynthesis=5F?=
 =?UTF-8?q?impl()`=20=E3=81=AE=E5=BB=83=E6=AD=A2=20(#930)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `._synthesis_impl()` の廃止

* fix: 移植忘れ
---
 test/test_synthesis_engine_base.py            |  5 +--
 voicevox_engine/dev/synthesis_engine/mock.py  | 25 +++++------
 voicevox_engine/tts_pipeline/tts_engine.py    | 30 ++++++-------
 .../tts_pipeline/tts_engine_base.py           | 43 +------------------
 4 files changed, 27 insertions(+), 76 deletions(-)

diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py
index d28048809..3fc5a9f8f 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_synthesis_engine_base.py
@@ -188,10 +188,7 @@ def is_model_loaded(self, style_id):
 class TestTTSEngineBase(TestCase):
     def setUp(self):
         super().setUp()
-        self.synthesis_engine = TTSEngine(
-            core=MockCore(),
-        )
-        self.synthesis_engine._synthesis_impl = Mock()
+        self.synthesis_engine = TTSEngine(core=MockCore())
 
     def create_synthesis_test_base(
         self,
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index c64184e56..4bed5e111 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -1,3 +1,4 @@
+import copy
 from logging import getLogger
 from typing import Any, Dict, List
 
@@ -61,22 +62,16 @@ def replace_mora_pitch(
         """
         return accent_phrases
 
-    def _synthesis_impl(self, query: AudioQuery, style_id: int) -> np.ndarray:
-        """
-        synthesis voicevox coreを使わずに、音声合成する [Mock]
-
-        Parameters
-        ----------
-        query : AudioQuery
-            音声合成用のクエリ
-        style_id : int
-            スタイルID
+    def synthesis(
+        self,
+        query: AudioQuery,
+        style_id: int,
+        enable_interrogative_upspeak: bool = True,
+    ) -> np.ndarray:
+        """音声合成用のクエリに含まれる読み仮名に基づいてOpenJTalkで音声波形を生成する (Mock)"""
+        # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
+        query = copy.deepcopy(query)
 
-        Returns
-        -------
-        wave [npt.NDArray[np.int16]]
-            音声波形データをNumPy配列で返します
-        """
         # recall text in katakana
         flatten_moras = to_flatten_moras(query.accent_phrases)
         kana_text = "".join([mora.text for mora in flatten_moras])
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 838f40a6a..85c3a0721 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -1,3 +1,4 @@
+import copy
 import math
 import threading
 from typing import List, Optional
@@ -9,7 +10,7 @@
 from ..core_wrapper import CoreWrapper, OldCoreError
 from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import Phoneme
-from .tts_engine_base import TTSEngineBase
+from .tts_engine_base import TTSEngineBase, apply_interrogative_upspeak
 
 unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
 mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
@@ -495,20 +496,19 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int):
 
         return accent_phrases
 
-    def _synthesis_impl(self, query: AudioQuery, style_id: int):
-        """
-        音声合成用のクエリから音声合成に必要な情報を構成し、実際に音声合成を行う
-        Parameters
-        ----------
-        query : AudioQuery
-            音声合成用のクエリ
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        wave : numpy.ndarray
-            音声合成結果
-        """
+    def synthesis(
+        self,
+        query: AudioQuery,
+        style_id: int,
+        enable_interrogative_upspeak: bool = True,
+    ) -> ndarray:
+        """音声合成用のクエリ・スタイルID・疑問文語尾自動調整フラグに基づいて音声波形を生成する"""
+        # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
+        query = copy.deepcopy(query)
+        query.accent_phrases = apply_interrogative_upspeak(
+            query.accent_phrases, enable_interrogative_upspeak
+        )
+
         phoneme, f0 = query_to_decoder_feature(query)
         raw_wave, sr_raw_wave = self.core.safe_decode_forward(phoneme, f0, style_id)
         wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index 5e427d0d3..61c83e283 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -134,46 +134,5 @@ def synthesis(
         style_id: int,
         enable_interrogative_upspeak: bool = True,
     ) -> np.ndarray:
-        """
-        音声合成用のクエリ内の疑問文指定されたMoraを変形した後、
-        継承先における実装`_synthesis_impl`を使い音声合成を行う
-        Parameters
-        ----------
-        query : AudioQuery
-            音声合成用のクエリ
-        style_id : int
-            スタイルID
-        enable_interrogative_upspeak : bool
-            疑問系のテキストの語尾を自動調整する機能を有効にするか
-        Returns
-        -------
-        wave : numpy.ndarray
-            音声合成結果
-        """
-        # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
-        query = copy.deepcopy(query)
-        query.accent_phrases = apply_interrogative_upspeak(
-            query.accent_phrases, enable_interrogative_upspeak
-        )
-        return self._synthesis_impl(query, style_id)
-
-    @abstractmethod
-    def _synthesis_impl(
-        self,
-        query: AudioQuery,
-        style_id: int,
-    ) -> np.ndarray:
-        """
-        音声合成用のクエリから音声合成に必要な情報を構成し、実際に音声合成を行う
-        Parameters
-        ----------
-        query : AudioQuery
-            音声合成用のクエリ
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        wave : numpy.ndarray
-            音声合成結果
-        """
+        """音声合成用のクエリ・スタイルID・疑問文語尾自動調整フラグに基づいて音声波形を生成する"""
         raise NotImplementedError()

From b8b50f21c3923eb3345e327730712eabbabac8d5 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 26 Dec 2023 21:46:00 +0900
Subject: [PATCH 073/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`core=5Fadapter`?=
 =?UTF-8?q?=20=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E5=88=87?=
 =?UTF-8?q?=E3=82=8A=E5=87=BA=E3=81=97=20(#932)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `core_adapter` モジュールの切り出し

* fix: lint
---
 run.py                                       |   7 +-
 voicevox_engine/core_adapter.py              | 116 +++++++++++++++++++
 voicevox_engine/dev/synthesis_engine/mock.py |   3 +-
 voicevox_engine/metas/MetasStore.py          |   2 +-
 voicevox_engine/morphing.py                  |   3 +-
 voicevox_engine/tts_pipeline/__init__.py     |   3 +-
 voicevox_engine/tts_pipeline/tts_engine.py   | 114 +-----------------
 7 files changed, 126 insertions(+), 122 deletions(-)
 create mode 100644 voicevox_engine/core_adapter.py

diff --git a/run.py b/run.py
index b7a9255d8..914483540 100644
--- a/run.py
+++ b/run.py
@@ -28,6 +28,7 @@
 
 from voicevox_engine import __version__
 from voicevox_engine.cancellable_engine import CancellableEngine
+from voicevox_engine.core_adapter import CoreAdapter
 from voicevox_engine.engine_manifest import EngineManifestLoader
 from voicevox_engine.engine_manifest.EngineManifest import EngineManifest
 from voicevox_engine.library_manager import LibraryManager
@@ -65,11 +66,7 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.tts_pipeline import (
-    CoreAdapter,
-    TTSEngineBase,
-    make_synthesis_engines,
-)
+from voicevox_engine.tts_pipeline import TTSEngineBase, make_synthesis_engines
 from voicevox_engine.tts_pipeline.kana_parser import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
new file mode 100644
index 000000000..e3ce43e24
--- /dev/null
+++ b/voicevox_engine/core_adapter.py
@@ -0,0 +1,116 @@
+import threading
+
+import numpy
+from numpy import ndarray
+
+from .core_wrapper import CoreWrapper, OldCoreError
+
+
+class CoreAdapter:
+    """
+    コアのアダプター。
+    ついでにコア内部で推論している処理をプロセスセーフにする。
+    """
+
+    def __init__(self, core: CoreWrapper):
+        super().__init__()
+        self.core = core
+        self.mutex = threading.Lock()
+
+    @property
+    def default_sampling_rate(self) -> int:
+        return self.core.default_sampling_rate
+
+    @property
+    def speakers(self) -> str:
+        """話者情報（json文字列）"""
+        return self.core.metas()
+
+    @property
+    def supported_devices(self) -> str | None:
+        """デバイスサポート情報（None: 情報無し）"""
+        try:
+            supported_devices = self.core.supported_devices()
+        except OldCoreError:
+            supported_devices = None
+        return supported_devices
+
+    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
+        """
+        指定したスタイルでの音声合成を初期化する。
+        何度も実行可能。未実装の場合は何もしない。
+        Parameters
+        ----------
+        style_id : int
+            スタイルID
+        skip_reinit : bool
+            True の場合, 既に初期化済みの話者の再初期化をスキップします
+        """
+        try:
+            with self.mutex:
+                # 以下の条件のいずれかを満たす場合, 初期化を実行する
+                # 1. 引数 skip_reinit が False の場合
+                # 2. 話者が初期化されていない場合
+                if (not skip_reinit) or (not self.core.is_model_loaded(style_id)):
+                    self.core.load_model(style_id)
+        except OldCoreError:
+            pass  # コアが古い場合はどうしようもないので何もしない
+
+    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
+        """指定したスタイルでの音声合成が初期化されているかどうかを返す"""
+        try:
+            return self.core.is_model_loaded(style_id)
+        except OldCoreError:
+            return True  # コアが古い場合はどうしようもないのでTrueを返す
+
+    def safe_yukarin_s_forward(self, phoneme_list_s: ndarray, style_id: int) -> ndarray:
+        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+        with self.mutex:
+            phoneme_length = self.core.yukarin_s_forward(
+                length=len(phoneme_list_s),
+                phoneme_list=phoneme_list_s,
+                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+            )
+        return phoneme_length
+
+    def safe_yukarin_sa_forward(
+        self,
+        vowel_phoneme_list: ndarray,
+        consonant_phoneme_list: ndarray,
+        start_accent_list: ndarray,
+        end_accent_list: ndarray,
+        start_accent_phrase_list: ndarray,
+        end_accent_phrase_list: ndarray,
+        style_id: int,
+    ) -> ndarray:
+        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+        with self.mutex:
+            f0_list = self.core.yukarin_sa_forward(
+                length=vowel_phoneme_list.shape[0],
+                vowel_phoneme_list=vowel_phoneme_list[numpy.newaxis],
+                consonant_phoneme_list=consonant_phoneme_list[numpy.newaxis],
+                start_accent_list=start_accent_list[numpy.newaxis],
+                end_accent_list=end_accent_list[numpy.newaxis],
+                start_accent_phrase_list=start_accent_phrase_list[numpy.newaxis],
+                end_accent_phrase_list=end_accent_phrase_list[numpy.newaxis],
+                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+            )[0]
+        return f0_list
+
+    def safe_decode_forward(
+        self, phoneme: ndarray, f0: ndarray, style_id: int
+    ) -> tuple[ndarray, int]:
+        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+        with self.mutex:
+            wave = self.core.decode_forward(
+                length=phoneme.shape[0],
+                phoneme_size=phoneme.shape[1],
+                f0=f0[:, numpy.newaxis],
+                phoneme=phoneme,
+                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+            )
+        sr_wave = self.default_sampling_rate
+        return wave, sr_wave
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index 4bed5e111..4d2adb79d 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -6,10 +6,11 @@
 from pyopenjtalk import tts
 from soxr import resample
 
+from ...core_adapter import CoreAdapter
 from ...core_wrapper import CoreWrapper
 from ...model import AccentPhrase, AudioQuery
 from ...tts_pipeline import TTSEngineBase
-from ...tts_pipeline.tts_engine import CoreAdapter, to_flatten_moras
+from ...tts_pipeline.tts_engine import to_flatten_moras
 
 
 class MockTTSEngine(TTSEngineBase):
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index d24232d50..efed15353 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -5,7 +5,7 @@
 from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo
 
 if TYPE_CHECKING:
-    from voicevox_engine.tts_pipeline.tts_engine import CoreAdapter
+    from voicevox_engine.core_adapter import CoreAdapter
 
 
 class MetasStore:
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 6b7a6d0dd..6f456df0d 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -7,10 +7,11 @@
 import pyworld as pw
 from soxr import resample
 
+from .core_adapter import CoreAdapter
 from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo
 from .metas.MetasStore import construct_lookup
 from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
-from .tts_pipeline import CoreAdapter, TTSEngine
+from .tts_pipeline import TTSEngine
 
 
 # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398  # noqa
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
index 5ccc2f800..8aeea1b06 100644
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -1,11 +1,10 @@
 from ..core_wrapper import CoreWrapper, load_runtime_lib
 from .make_tts_engines import make_synthesis_engines
-from .tts_engine import CoreAdapter, TTSEngine
+from .tts_engine import TTSEngine
 from .tts_engine_base import TTSEngineBase
 
 __all__ = [
     "CoreWrapper",
-    "CoreAdapter",
     "load_runtime_lib",
     "make_synthesis_engines",
     "TTSEngine",
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 85c3a0721..ae6ea6de1 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -1,13 +1,13 @@
 import copy
 import math
-import threading
 from typing import List, Optional
 
 import numpy
 from numpy import ndarray
 from soxr import resample
 
-from ..core_wrapper import CoreWrapper, OldCoreError
+from ..core_adapter import CoreAdapter
+from ..core_wrapper import CoreWrapper
 from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import Phoneme
 from .tts_engine_base import TTSEngineBase, apply_interrogative_upspeak
@@ -210,116 +210,6 @@ def raw_wave_to_output_wave(query: AudioQuery, wave: ndarray, sr_wave: int) -> n
     return wave
 
 
-class CoreAdapter:
-    """
-    コアのアダプター。
-    ついでにコア内部で推論している処理をプロセスセーフにする。
-    """
-
-    def __init__(self, core: CoreWrapper):
-        super().__init__()
-        self.core = core
-        self.mutex = threading.Lock()
-
-    @property
-    def default_sampling_rate(self) -> int:
-        return self.core.default_sampling_rate
-
-    @property
-    def speakers(self) -> str:
-        """話者情報（json文字列）"""
-        return self.core.metas()
-
-    @property
-    def supported_devices(self) -> str | None:
-        """デバイスサポート情報（None: 情報無し）"""
-        try:
-            supported_devices = self.core.supported_devices()
-        except OldCoreError:
-            supported_devices = None
-        return supported_devices
-
-    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
-        """
-        指定したスタイルでの音声合成を初期化する。
-        何度も実行可能。未実装の場合は何もしない。
-        Parameters
-        ----------
-        style_id : int
-            スタイルID
-        skip_reinit : bool
-            True の場合, 既に初期化済みの話者の再初期化をスキップします
-        """
-        try:
-            with self.mutex:
-                # 以下の条件のいずれかを満たす場合, 初期化を実行する
-                # 1. 引数 skip_reinit が False の場合
-                # 2. 話者が初期化されていない場合
-                if (not skip_reinit) or (not self.core.is_model_loaded(style_id)):
-                    self.core.load_model(style_id)
-        except OldCoreError:
-            pass  # コアが古い場合はどうしようもないので何もしない
-
-    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
-        """指定したスタイルでの音声合成が初期化されているかどうかを返す"""
-        try:
-            return self.core.is_model_loaded(style_id)
-        except OldCoreError:
-            return True  # コアが古い場合はどうしようもないのでTrueを返す
-
-    def safe_yukarin_s_forward(self, phoneme_list_s: ndarray, style_id: int) -> ndarray:
-        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
-        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
-        with self.mutex:
-            phoneme_length = self.core.yukarin_s_forward(
-                length=len(phoneme_list_s),
-                phoneme_list=phoneme_list_s,
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
-            )
-        return phoneme_length
-
-    def safe_yukarin_sa_forward(
-        self,
-        vowel_phoneme_list: ndarray,
-        consonant_phoneme_list: ndarray,
-        start_accent_list: ndarray,
-        end_accent_list: ndarray,
-        start_accent_phrase_list: ndarray,
-        end_accent_phrase_list: ndarray,
-        style_id: int,
-    ) -> ndarray:
-        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
-        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
-        with self.mutex:
-            f0_list = self.core.yukarin_sa_forward(
-                length=vowel_phoneme_list.shape[0],
-                vowel_phoneme_list=vowel_phoneme_list[numpy.newaxis],
-                consonant_phoneme_list=consonant_phoneme_list[numpy.newaxis],
-                start_accent_list=start_accent_list[numpy.newaxis],
-                end_accent_list=end_accent_list[numpy.newaxis],
-                start_accent_phrase_list=start_accent_phrase_list[numpy.newaxis],
-                end_accent_phrase_list=end_accent_phrase_list[numpy.newaxis],
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
-            )[0]
-        return f0_list
-
-    def safe_decode_forward(
-        self, phoneme: ndarray, f0: ndarray, style_id: int
-    ) -> tuple[ndarray, int]:
-        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
-        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
-        with self.mutex:
-            wave = self.core.decode_forward(
-                length=phoneme.shape[0],
-                phoneme_size=phoneme.shape[1],
-                f0=f0[:, numpy.newaxis],
-                phoneme=phoneme,
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
-            )
-        sr_wave = self.default_sampling_rate
-        return wave, sr_wave
-
-
 class TTSEngine(TTSEngineBase):
     """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
 

From 9e29060e9772d5de585a1b20e8ada04527b10c94 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 26 Dec 2023 21:58:26 +0900
Subject: [PATCH 074/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`apply=5Finterrog?=
 =?UTF-8?q?ative=5Fupspeak()`=20=E7=A7=BB=E6=A4=8D=20(#933)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `apply_interrogative_upspeak()` 移植

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_synthesis_engine_base.py            |  4 ++-
 voicevox_engine/tts_pipeline/tts_engine.py    | 35 +++++++++++++++++-
 .../tts_pipeline/tts_engine_base.py           | 36 +------------------
 3 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py
index 3fc5a9f8f..a3fddd31b 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_synthesis_engine_base.py
@@ -6,7 +6,9 @@
 
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
-from voicevox_engine.tts_pipeline.tts_engine_base import apply_interrogative_upspeak
+from voicevox_engine.tts_pipeline.tts_engine import (
+    apply_interrogative_upspeak,  # FIXME: この関数を使うテストをTTSEngine用のテストに移動する
+)
 
 
 def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray):
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index ae6ea6de1..fc1af32a6 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -10,11 +10,17 @@
 from ..core_wrapper import CoreWrapper
 from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import Phoneme
-from .tts_engine_base import TTSEngineBase, apply_interrogative_upspeak
+from .mora_list import openjtalk_mora2text
+from .tts_engine_base import TTSEngineBase
 
 unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
 mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
 
+# 疑問文語尾定数
+UPSPEAK_LENGTH = 0.15
+UPSPEAK_PITCH_ADD = 0.3
+UPSPEAK_PITCH_MAX = 6.5
+
 
 # TODO: move mora utility to mora module
 def to_flatten_moras(accent_phrases: list[AccentPhrase]) -> list[Mora]:
@@ -83,6 +89,33 @@ def generate_silence_mora(length: float) -> Mora:
     return Mora(text="　", vowel="sil", vowel_length=length, pitch=0.0)
 
 
+def apply_interrogative_upspeak(
+    accent_phrases: list[AccentPhrase], enable_interrogative_upspeak: bool
+) -> list[AccentPhrase]:
+    """必要に応じて各アクセント句の末尾へ疑問形モーラ（同一母音・継続長 0.15秒・音高↑）を付与する"""
+    # NOTE: 将来的にAudioQueryインスタンスを引数にする予定
+    if not enable_interrogative_upspeak:
+        return accent_phrases
+
+    for accent_phrase in accent_phrases:
+        moras = accent_phrase.moras
+        if len(moras) == 0:
+            continue
+        # 疑問形補正条件: 疑問形アクセント句 & 末尾有声モーラ
+        if accent_phrase.is_interrogative and moras[-1].pitch > 0:
+            last_mora = copy.deepcopy(moras[-1])
+            upspeak_mora = Mora(
+                text=openjtalk_mora2text[last_mora.vowel],
+                consonant=None,
+                consonant_length=None,
+                vowel=last_mora.vowel,
+                vowel_length=UPSPEAK_LENGTH,
+                pitch=min(last_mora.pitch + UPSPEAK_PITCH_ADD, UPSPEAK_PITCH_MAX),
+            )
+            accent_phrase.moras += [upspeak_mora]
+    return accent_phrases
+
+
 def apply_prepost_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     """モーラ系列へ音声合成用のクエリがもつ前後無音（`prePhonemeLength` & `postPhonemeLength`）を付加する"""
     pre_silence_moras = [generate_silence_mora(query.prePhonemeLength)]
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index 61c83e283..42cafa4ab 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -1,44 +1,10 @@
-import copy
 from abc import ABCMeta, abstractmethod
 from typing import List
 
 import numpy as np
 
-from ..model import AccentPhrase, AudioQuery, Mora
+from ..model import AccentPhrase, AudioQuery
 from .full_context_label import text_to_accent_phrases
-from .mora_list import openjtalk_mora2text
-
-# 疑問文語尾定数
-UPSPEAK_LENGTH = 0.15
-UPSPEAK_PITCH_ADD = 0.3
-UPSPEAK_PITCH_MAX = 6.5
-
-
-def apply_interrogative_upspeak(
-    accent_phrases: list[AccentPhrase], enable_interrogative_upspeak: bool
-) -> list[AccentPhrase]:
-    """必要に応じて各アクセント句の末尾へ疑問形モーラ（同一母音・継続長 0.15秒・音高↑）を付与する"""
-    # NOTE: 将来的にAudioQueryインスタンスを引数にする予定
-    if not enable_interrogative_upspeak:
-        return accent_phrases
-
-    for accent_phrase in accent_phrases:
-        moras = accent_phrase.moras
-        if len(moras) == 0:
-            continue
-        # 疑問形補正条件: 疑問形アクセント句 & 末尾有声モーラ
-        if accent_phrase.is_interrogative and moras[-1].pitch > 0:
-            last_mora = copy.deepcopy(moras[-1])
-            upspeak_mora = Mora(
-                text=openjtalk_mora2text[last_mora.vowel],
-                consonant=None,
-                consonant_length=None,
-                vowel=last_mora.vowel,
-                vowel_length=UPSPEAK_LENGTH,
-                pitch=min(last_mora.pitch + UPSPEAK_PITCH_ADD, UPSPEAK_PITCH_MAX),
-            )
-            accent_phrase.moras += [upspeak_mora]
-    return accent_phrases
 
 
 class TTSEngineBase(metaclass=ABCMeta):

From 6e965dc503f556675fcd59a7f8262d7888402155 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 26 Dec 2023 22:13:48 +0900
Subject: [PATCH 075/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20TTS=E3=82=B5?=
 =?UTF-8?q?=E3=83=96=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E3=83=AA?=
 =?UTF-8?q?=E3=83=8D=E3=83=BC=E3=83=A0=20(#934)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `full_context_label` リネーム

* refactor: `kana_parser` リネーム

* ミス修正

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 run.py                                                 |  2 +-
 test/{test_kana_parser.py => test_kana_converter.py}   | 10 +++++-----
 test/test_mock_synthesis_engine.py                     |  2 +-
 test/test_mora_to_text.py                              |  2 +-
 ...est_full_context_label.py => test_text_analyzer.py} |  2 +-
 test/test_user_dict_model.py                           |  2 +-
 .../tts_pipeline/{kana_parser.py => kana_converter.py} |  0
 .../{full_context_label.py => text_analyzer.py}        |  0
 voicevox_engine/tts_pipeline/tts_engine_base.py        |  2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)
 rename test/{test_kana_parser.py => test_kana_converter.py} (98%)
 rename test/{test_full_context_label.py => test_text_analyzer.py} (99%)
 rename voicevox_engine/tts_pipeline/{kana_parser.py => kana_converter.py} (100%)
 rename voicevox_engine/tts_pipeline/{full_context_label.py => text_analyzer.py} (100%)

diff --git a/run.py b/run.py
index 914483540..e752c3409 100644
--- a/run.py
+++ b/run.py
@@ -67,7 +67,7 @@
     SettingLoader,
 )
 from voicevox_engine.tts_pipeline import TTSEngineBase, make_synthesis_engines
-from voicevox_engine.tts_pipeline.kana_parser import create_kana, parse_kana
+from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
     delete_word,
diff --git a/test/test_kana_parser.py b/test/test_kana_converter.py
similarity index 98%
rename from test/test_kana_parser.py
rename to test/test_kana_converter.py
index 3e4c19a97..02fa6bdb5 100644
--- a/test/test_kana_parser.py
+++ b/test/test_kana_converter.py
@@ -2,12 +2,12 @@
 from unittest import TestCase
 
 from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
-from voicevox_engine.tts_pipeline import kana_parser
-from voicevox_engine.tts_pipeline.kana_parser import create_kana
+from voicevox_engine.tts_pipeline import kana_converter
+from voicevox_engine.tts_pipeline.kana_converter import create_kana
 
 
 def parse_kana(text: str) -> List[AccentPhrase]:
-    accent_phrases = kana_parser.parse_kana(text)
+    accent_phrases = kana_converter.parse_kana(text)
     return accent_phrases
 
 
@@ -59,7 +59,7 @@ def test_roundtrip(self):
     def _accent_phrase_marks_base(
         self, text: str, expected_accent_phrases: List[AccentPhrase]
     ) -> None:
-        accent_phrases = kana_parser.parse_kana(text)
+        accent_phrases = kana_converter.parse_kana(text)
         self.assertEqual(expected_accent_phrases, accent_phrases)
 
     def test_accent_phrase_marks(self):
@@ -556,7 +556,7 @@ def test_exceptions(self):
         self.assertEqual(err.exception.kwargs, {"position": "2"})
 
         with self.assertRaises(ParseKanaError) as err:
-            kana_parser.parse_kana("ア？ア'")
+            kana_converter.parse_kana("ア？ア'")
         self.assertEqual(
             err.exception.errcode, ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END
         )
diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py
index 7b4bcffae..820be6cc9 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_synthesis_engine.py
@@ -3,7 +3,7 @@
 from voicevox_engine.dev.core import MockCoreWrapper
 from voicevox_engine.dev.synthesis_engine import MockTTSEngine
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.tts_pipeline.kana_parser import create_kana
+from voicevox_engine.tts_pipeline.kana_converter import create_kana
 
 
 class TestMockTTSEngine(TestCase):
diff --git a/test/test_mora_to_text.py b/test/test_mora_to_text.py
index 279953379..c2b122d8e 100644
--- a/test/test_mora_to_text.py
+++ b/test/test_mora_to_text.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
-from voicevox_engine.tts_pipeline.full_context_label import mora_to_text
+from voicevox_engine.tts_pipeline.text_analyzer import mora_to_text
 
 
 class TestMoraToText(TestCase):
diff --git a/test/test_full_context_label.py b/test/test_text_analyzer.py
similarity index 99%
rename from test/test_full_context_label.py
rename to test/test_text_analyzer.py
index 76506ff46..4de4afad6 100644
--- a/test/test_full_context_label.py
+++ b/test/test_text_analyzer.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
 
-from voicevox_engine.tts_pipeline.full_context_label import (
+from voicevox_engine.tts_pipeline.text_analyzer import (
     AccentPhraseLabel,
     BreathGroupLabel,
     Label,
diff --git a/test/test_user_dict_model.py b/test/test_user_dict_model.py
index 646340c6c..823310157 100644
--- a/test/test_user_dict_model.py
+++ b/test/test_user_dict_model.py
@@ -4,7 +4,7 @@
 from pydantic import ValidationError
 
 from voicevox_engine.model import UserDictWord
-from voicevox_engine.tts_pipeline.kana_parser import parse_kana
+from voicevox_engine.tts_pipeline.kana_converter import parse_kana
 
 
 class TestUserDictWords(TestCase):
diff --git a/voicevox_engine/tts_pipeline/kana_parser.py b/voicevox_engine/tts_pipeline/kana_converter.py
similarity index 100%
rename from voicevox_engine/tts_pipeline/kana_parser.py
rename to voicevox_engine/tts_pipeline/kana_converter.py
diff --git a/voicevox_engine/tts_pipeline/full_context_label.py b/voicevox_engine/tts_pipeline/text_analyzer.py
similarity index 100%
rename from voicevox_engine/tts_pipeline/full_context_label.py
rename to voicevox_engine/tts_pipeline/text_analyzer.py
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
index 42cafa4ab..77c486056 100644
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ b/voicevox_engine/tts_pipeline/tts_engine_base.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 from ..model import AccentPhrase, AudioQuery
-from .full_context_label import text_to_accent_phrases
+from .text_analyzer import text_to_accent_phrases
 
 
 class TTSEngineBase(metaclass=ABCMeta):

From 7845b046edaeb02f93ede4a0172c13c1e3fe322d Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 27 Dec 2023 01:38:41 +0900
Subject: [PATCH 076/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E5=88=9D?=
 =?UTF-8?q?=E6=9C=9F=E5=8C=96=E6=99=82=E3=81=AE=20`cores`=20=E5=8F=96?=
 =?UTF-8?q?=E5=BE=97=20(#929)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: coresの取得

* fix: リネーム
---
 build_util/make_docs.py                       |  5 +++-
 run.py                                        |  6 +++--
 test/e2e/conftest.py                          |  5 ++--
 voicevox_engine/cancellable_engine.py         |  4 +--
 voicevox_engine/tts_pipeline/__init__.py      |  4 +--
 .../tts_pipeline/make_tts_engines.py          | 25 +++++++++++--------
 6 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index c9f403654..fb0ac36a0 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -5,6 +5,7 @@
 from voicevox_engine.dev.synthesis_engine.mock import MockTTSEngine
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
+from voicevox_engine.tts_pipeline.tts_engine import CoreAdapter
 from voicevox_engine.utility import engine_root
 
 
@@ -35,9 +36,11 @@ def generate_api_docs_html(schema: str) -> str:
 
     import run
 
+    mock_core = MockCoreWrapper()
     # FastAPI の機能を用いて OpenAPI schema を生成する
     app = run.generate_app(
-        synthesis_engines={"mock": MockTTSEngine(MockCoreWrapper())},
+        synthesis_engines={"mock": MockTTSEngine(mock_core)},
+        cores={"mock": CoreAdapter(mock_core)},
         latest_core_version="mock",
         setting_loader=SettingLoader(USER_SETTING_PATH),
         preset_manager=PresetManager(  # FIXME: impl MockPresetManager
diff --git a/run.py b/run.py
index e752c3409..d35717d8b 100644
--- a/run.py
+++ b/run.py
@@ -66,7 +66,7 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.tts_pipeline import TTSEngineBase, make_synthesis_engines
+from voicevox_engine.tts_pipeline import TTSEngineBase, make_synthesis_engines_and_cores
 from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
@@ -133,6 +133,7 @@ def set_output_log_utf8() -> None:
 
 def generate_app(
     synthesis_engines: Dict[str, TTSEngineBase],
+    cores: Dict[str, CoreAdapter],  # NOTE: synthesis_engines の機能を一部代替予定
     latest_core_version: str,
     setting_loader: SettingLoader,
     preset_manager: PresetManager,
@@ -1421,7 +1422,7 @@ def main() -> None:
     cpu_num_threads: int | None = args.cpu_num_threads
     load_all_models: bool = args.load_all_models
 
-    synthesis_engines = make_synthesis_engines(
+    synthesis_engines, cores = make_synthesis_engines_and_cores(
         use_gpu=use_gpu,
         voicelib_dirs=voicelib_dirs,
         voicevox_dir=voicevox_dir,
@@ -1488,6 +1489,7 @@ def main() -> None:
     uvicorn.run(
         generate_app(
             synthesis_engines,
+            cores,
             latest_core_version,
             setting_loader,
             preset_manager=preset_manager,
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 32da2a156..3f2b158e8 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -8,7 +8,7 @@
 
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import SettingLoader
-from voicevox_engine.tts_pipeline import make_synthesis_engines
+from voicevox_engine.tts_pipeline import make_synthesis_engines_and_cores
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
 
@@ -27,7 +27,7 @@ def snapshot_json(snapshot: SnapshotAssertion):
 
 @pytest.fixture(scope="session")
 def client():
-    synthesis_engines = make_synthesis_engines(use_gpu=False)
+    synthesis_engines, cores = make_synthesis_engines_and_cores(use_gpu=False)
     latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
     setting_loader = SettingLoader(Path("./not_exist.yaml"))
     preset_manager = PresetManager(  # FIXME: impl MockPresetManager
@@ -37,6 +37,7 @@ def client():
     return TestClient(
         generate_app(
             synthesis_engines=synthesis_engines,
+            cores=cores,
             latest_core_version=latest_core_version,
             setting_loader=setting_loader,
             preset_manager=preset_manager,
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index 32bf29555..8de52cb3f 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -17,7 +17,7 @@
 from fastapi import HTTPException, Request
 
 from .model import AudioQuery
-from .tts_pipeline import make_synthesis_engines
+from .tts_pipeline import make_synthesis_engines_and_cores
 from .utility import get_latest_core_version
 
 
@@ -219,7 +219,7 @@ def start_synthesis_subprocess(
         メインプロセスと通信するためのPipe
     """
 
-    synthesis_engines = make_synthesis_engines(
+    synthesis_engines, _ = make_synthesis_engines_and_cores(
         use_gpu=use_gpu,
         voicelib_dirs=voicelib_dirs,
         voicevox_dir=voicevox_dir,
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
index 8aeea1b06..336416982 100644
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -1,12 +1,12 @@
 from ..core_wrapper import CoreWrapper, load_runtime_lib
-from .make_tts_engines import make_synthesis_engines
+from .make_tts_engines import make_synthesis_engines_and_cores
 from .tts_engine import TTSEngine
 from .tts_engine_base import TTSEngineBase
 
 __all__ = [
     "CoreWrapper",
     "load_runtime_lib",
-    "make_synthesis_engines",
+    "make_synthesis_engines_and_cores",
     "TTSEngine",
     "TTSEngineBase",
 ]
diff --git a/voicevox_engine/tts_pipeline/make_tts_engines.py b/voicevox_engine/tts_pipeline/make_tts_engines.py
index 6a3af9352..a282b8f3f 100644
--- a/voicevox_engine/tts_pipeline/make_tts_engines.py
+++ b/voicevox_engine/tts_pipeline/make_tts_engines.py
@@ -1,14 +1,14 @@
 import json
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import List, Optional
 
 from ..core_wrapper import CoreWrapper, load_runtime_lib
 from ..utility import engine_root, get_save_dir
-from .tts_engine import TTSEngine, TTSEngineBase
+from .tts_engine import CoreAdapter, TTSEngine, TTSEngineBase
 
 
-def make_synthesis_engines(
+def make_synthesis_engines_and_cores(
     use_gpu: bool,
     voicelib_dirs: Optional[List[Path]] = None,
     voicevox_dir: Optional[Path] = None,
@@ -16,7 +16,7 @@ def make_synthesis_engines(
     cpu_num_threads: Optional[int] = None,
     enable_mock: bool = True,
     load_all_models: bool = False,
-) -> Dict[str, TTSEngineBase]:
+) -> tuple[dict[str, TTSEngineBase], dict[str, CoreAdapter]]:
     """
     音声ライブラリをロードして、音声合成エンジンを生成
 
@@ -72,8 +72,9 @@ def make_synthesis_engines(
     # ランタイムをロードする
     load_runtime_lib(runtime_dirs)
 
-    # コアをロードし `synthesis_engines` へ登録する
-    synthesis_engines = {}
+    # コアをロードし `cores` と `synthesis_engines` へ登録する
+    cores: dict[str, CoreAdapter] = {}
+    synthesis_engines: dict[str, TTSEngineBase] = {}
 
     if not enable_mock:
 
@@ -101,7 +102,8 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
                         file=sys.stderr,
                     )
                 else:
-                    synthesis_engines[core_version] = TTSEngine(core=core)
+                    cores[core_version] = CoreAdapter(core)
+                    synthesis_engines[core_version] = TTSEngine(core)
             except Exception:
                 # コアでなかった場合のエラーを抑制する
                 if not suppress_error:
@@ -130,8 +132,11 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
         from ..dev.core import MockCoreWrapper
         from ..dev.synthesis_engine import MockTTSEngine
 
-        if "0.0.0" not in synthesis_engines:
+        mock_ver = "0.0.0"
+        if mock_ver not in synthesis_engines:
             print("Info: Loading mock.")
-            synthesis_engines["0.0.0"] = MockTTSEngine(MockCoreWrapper())
+            core = MockCoreWrapper()
+            cores[mock_ver] = CoreAdapter(core)
+            synthesis_engines[mock_ver] = MockTTSEngine(core)
 
-    return synthesis_engines
+    return synthesis_engines, cores

From 4801a963e08b50c0734036015d5ceb4c09afcd1a Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 27 Dec 2023 01:54:14 +0900
Subject: [PATCH 077/177] =?UTF-8?q?[docs]=20=E3=82=B9=E3=83=94=E3=83=BC?=
 =?UTF-8?q?=E3=82=AB=E3=83=BC=E2=86=92=E8=A9=B1=E8=80=85=20(#827)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* スピーカー→話者

* 話者スタイル→スタイル
---
 voicevox_engine/metas/Metas.py   | 16 ++++++++--------
 voicevox_engine/preset/Preset.py |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/voicevox_engine/metas/Metas.py b/voicevox_engine/metas/Metas.py
index 58c42f067..4eb1e0a46 100644
--- a/voicevox_engine/metas/Metas.py
+++ b/voicevox_engine/metas/Metas.py
@@ -6,7 +6,7 @@
 
 class SpeakerStyle(BaseModel):
     """
-    スピーカーのスタイル情報
+    話者のスタイル情報
     """
 
     name: str = Field(title="スタイル名")
@@ -35,28 +35,28 @@ class SpeakerSupportedFeatures(BaseModel):
 
 class CoreSpeaker(BaseModel):
     """
-    コアに含まれるスピーカー情報
+    コアに含まれる話者情報
     """
 
     name: str = Field(title="名前")
-    speaker_uuid: str = Field(title="スピーカーのUUID")
-    styles: List[SpeakerStyle] = Field(title="スピーカースタイルの一覧")
-    version: str = Field("スピーカーのバージョン")
+    speaker_uuid: str = Field(title="話者のUUID")
+    styles: List[SpeakerStyle] = Field(title="スタイルの一覧")
+    version: str = Field("話者のバージョン")
 
 
 class EngineSpeaker(BaseModel):
     """
-    エンジンに含まれるスピーカー情報
+    エンジンに含まれる話者情報
     """
 
     supported_features: SpeakerSupportedFeatures = Field(
-        title="スピーカーの対応機能", default_factory=SpeakerSupportedFeatures
+        title="話者の対応機能", default_factory=SpeakerSupportedFeatures
     )
 
 
 class Speaker(CoreSpeaker, EngineSpeaker):
     """
-    スピーカー情報
+    話者情報
     """
 
     pass
diff --git a/voicevox_engine/preset/Preset.py b/voicevox_engine/preset/Preset.py
index 40b03d10c..82dc8daa2 100644
--- a/voicevox_engine/preset/Preset.py
+++ b/voicevox_engine/preset/Preset.py
@@ -8,7 +8,7 @@ class Preset(BaseModel):
 
     id: int = Field(title="プリセットID")
     name: str = Field(title="プリセット名")
-    speaker_uuid: str = Field(title="スピーカーのUUID")
+    speaker_uuid: str = Field(title="話者のUUID")
     style_id: int = Field(title="スタイルID")
     speedScale: float = Field(title="全体の話速")
     pitchScale: float = Field(title="全体の音高")

From 9b96d207d125d6c2de34e2a301ef97c4bd8bd9bc Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 27 Dec 2023 01:54:31 +0900
Subject: [PATCH 078/177] =?UTF-8?q?=E3=83=A9=E3=82=A4=E3=83=96=E3=83=A9?=
 =?UTF-8?q?=E3=83=AA=E7=AE=A1=E7=90=86=E6=9C=AA=E5=AF=BE=E5=BF=9C=E3=81=A0?=
 =?UTF-8?q?=E3=81=A3=E3=81=9F=E3=82=89API=E3=81=8C=E5=AE=9F=E8=A3=85?=
 =?UTF-8?q?=E3=81=95=E3=82=8C=E3=81=AA=E3=81=84=E3=82=88=E3=81=86=E3=81=AB?=
 =?UTF-8?q?=E5=A4=89=E6=9B=B4=20(#886)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 run.py | 154 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 78 insertions(+), 76 deletions(-)

diff --git a/run.py b/run.py
index d35717d8b..1edbc5301 100644
--- a/run.py
+++ b/run.py
@@ -879,85 +879,87 @@ def speaker_info(
 
         return ret_data
 
-    @app.get(
-        "/downloadable_libraries",
-        response_model=list[DownloadableLibraryInfo],
-        tags=["音声ライブラリ管理"],
-    )
-    def downloadable_libraries() -> list[DownloadableLibraryInfo]:
-        """
-        ダウンロード可能な音声ライブラリの情報を返します。
-
-        Returns
-        -------
-        ret_data: list[DownloadableLibrary]
-        """
-        if not engine_manifest_data.supported_features.manage_library:
-            raise HTTPException(status_code=404, detail="この機能は実装されていません")
-        return library_manager.downloadable_libraries()
-
-    @app.get(
-        "/installed_libraries",
-        response_model=dict[str, InstalledLibraryInfo],
-        tags=["音声ライブラリ管理"],
-    )
-    def installed_libraries() -> dict[str, InstalledLibraryInfo]:
-        """
-        インストールした音声ライブラリの情報を返します。
-
-        Returns
-        -------
-        ret_data: dict[str, InstalledLibrary]
-        """
-        if not engine_manifest_data.supported_features.manage_library:
-            raise HTTPException(status_code=404, detail="この機能は実装されていません")
-        return library_manager.installed_libraries()
+    if engine_manifest_data.supported_features.manage_library:
 
-    @app.post(
-        "/install_library/{library_uuid}",
-        status_code=204,
-        tags=["音声ライブラリ管理"],
-    )
-    async def install_library(
-        library_uuid: str,
-        request: Request,
-    ) -> Response:
-        """
-        音声ライブラリをインストールします。
-        音声ライブラリのZIPファイルをリクエストボディとして送信してください。
-
-        Parameters
-        ----------
-        library_uuid: str
-            音声ライブラリのID
-        """
-        if not engine_manifest_data.supported_features.manage_library:
-            raise HTTPException(status_code=404, detail="この機能は実装されていません")
-        archive = BytesIO(await request.body())
-        loop = asyncio.get_event_loop()
-        await loop.run_in_executor(
-            None, library_manager.install_library, library_uuid, archive
+        @app.get(
+            "/downloadable_libraries",
+            response_model=list[DownloadableLibraryInfo],
+            tags=["音声ライブラリ管理"],
         )
-        return Response(status_code=204)
-
-    @app.post(
-        "/uninstall_library/{library_uuid}",
-        status_code=204,
-        tags=["音声ライブラリ管理"],
-    )
-    def uninstall_library(library_uuid: str) -> Response:
-        """
-        音声ライブラリをアンインストールします。
+        def downloadable_libraries() -> list[DownloadableLibraryInfo]:
+            """
+            ダウンロード可能な音声ライブラリの情報を返します。
+
+            Returns
+            -------
+            ret_data: list[DownloadableLibrary]
+            """
+            if not engine_manifest_data.supported_features.manage_library:
+                raise HTTPException(status_code=404, detail="この機能は実装されていません")
+            return library_manager.downloadable_libraries()
+
+        @app.get(
+            "/installed_libraries",
+            response_model=dict[str, InstalledLibraryInfo],
+            tags=["音声ライブラリ管理"],
+        )
+        def installed_libraries() -> dict[str, InstalledLibraryInfo]:
+            """
+            インストールした音声ライブラリの情報を返します。
+
+            Returns
+            -------
+            ret_data: dict[str, InstalledLibrary]
+            """
+            if not engine_manifest_data.supported_features.manage_library:
+                raise HTTPException(status_code=404, detail="この機能は実装されていません")
+            return library_manager.installed_libraries()
+
+        @app.post(
+            "/install_library/{library_uuid}",
+            status_code=204,
+            tags=["音声ライブラリ管理"],
+        )
+        async def install_library(
+            library_uuid: str,
+            request: Request,
+        ) -> Response:
+            """
+            音声ライブラリをインストールします。
+            音声ライブラリのZIPファイルをリクエストボディとして送信してください。
+
+            Parameters
+            ----------
+            library_uuid: str
+                音声ライブラリのID
+            """
+            if not engine_manifest_data.supported_features.manage_library:
+                raise HTTPException(status_code=404, detail="この機能は実装されていません")
+            archive = BytesIO(await request.body())
+            loop = asyncio.get_event_loop()
+            await loop.run_in_executor(
+                None, library_manager.install_library, library_uuid, archive
+            )
+            return Response(status_code=204)
 
-        Parameters
-        ----------
-        library_uuid: str
-            音声ライブラリのID
-        """
-        if not engine_manifest_data.supported_features.manage_library:
-            raise HTTPException(status_code=404, detail="この機能は実装されていません")
-        library_manager.uninstall_library(library_uuid)
-        return Response(status_code=204)
+        @app.post(
+            "/uninstall_library/{library_uuid}",
+            status_code=204,
+            tags=["音声ライブラリ管理"],
+        )
+        def uninstall_library(library_uuid: str) -> Response:
+            """
+            音声ライブラリをアンインストールします。
+
+            Parameters
+            ----------
+            library_uuid: str
+                音声ライブラリのID
+            """
+            if not engine_manifest_data.supported_features.manage_library:
+                raise HTTPException(status_code=404, detail="この機能は実装されていません")
+            library_manager.uninstall_library(library_uuid)
+            return Response(status_code=204)
 
     @app.post("/initialize_style_id", status_code=204, tags=["その他"])
     def initialize_style_id(

From dcac97dbc37d7e1dc17fba0bbee5f470a6a30d70 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 27 Dec 2023 02:36:02 +0900
Subject: [PATCH 079/177] =?UTF-8?q?=E6=9B=B8=E3=81=8D=E8=BE=BC=E3=81=BF?=
 =?UTF-8?q?=E7=B3=BB=E3=81=AEAPI=E3=82=92=E4=B8=80=E6=8B=AC=E3=81=A7?=
 =?UTF-8?q?=E7=84=A1=E5=8A=B9=E5=8C=96=E3=81=99=E3=82=8B=E5=BC=95=E6=95=B0?=
 =?UTF-8?q?=E3=83=BB=E7=92=B0=E5=A2=83=E5=A4=89=E6=95=B0=E3=82=92=E8=BF=BD?=
 =?UTF-8?q?=E5=8A=A0=20(#895)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 破壊的変更が可能なAPIを無効にできるようにする

* エンジンの静的なデータを変更するAPIを無効化するように

* README変更漏れ

* pysen lint

* 忘れ

* pysen
---
 README.md                              |   4 +
 run.py                                 | 107 +++++++++++++++++++------
 test/e2e/conftest.py                   |  22 ++---
 test/e2e/test_disable_api.py           |  49 +++++++++++
 voicevox_engine/utility/run_utility.py |  23 ++++++
 5 files changed, 170 insertions(+), 35 deletions(-)
 create mode 100644 test/e2e/test_disable_api.py
 create mode 100644 voicevox_engine/utility/run_utility.py

diff --git a/README.md b/README.md
index 2c4da3ff5..0c54b115a 100644
--- a/README.md
+++ b/README.md
@@ -319,6 +319,10 @@ VOICEVOX ではセキュリティ保護のため`localhost`・`127.0.0.1`・`app
 3. 保存ボタンを押して、変更を確定してください。
 4. 設定の適用にはエンジンの再起動が必要です。必要に応じて再起動をしてください。
 
+### データを変更する API を無効化する
+
+実行時引数`--disable_mutable_api`か環境変数`VV_DISABLE_MUTABLE_API=1`を指定することで、エンジンの設定や辞書などを変更する API を無効にできます。
+
 ### 文字コード
 
 リクエスト・レスポンスの文字コードはすべて UTF-8 です。
diff --git a/run.py b/run.py
index 1edbc5301..9380d4365 100644
--- a/run.py
+++ b/run.py
@@ -17,7 +17,7 @@
 
 import soundfile
 import uvicorn
-from fastapi import FastAPI, Form, HTTPException, Query, Request, Response
+from fastapi import Depends, FastAPI, Form, HTTPException, Query, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.openapi.utils import get_openapi
 from fastapi.responses import JSONResponse
@@ -85,6 +85,7 @@
     get_save_dir,
     internal_root,
 )
+from voicevox_engine.utility.run_utility import decide_boolean_from_env
 
 
 def get_style_id_from_deprecated(style_id: int | None, speaker_id: int | None) -> int:
@@ -141,6 +142,7 @@ def generate_app(
     root_dir: Optional[Path] = None,
     cors_policy_mode: CorsPolicyMode = CorsPolicyMode.localapps,
     allow_origin: Optional[List[str]] = None,
+    disable_mutable_api: bool = False,
 ) -> FastAPI:
     if root_dir is None:
         root_dir = engine_root()
@@ -197,6 +199,11 @@ async def block_origin_middleware(request: Request, call_next):
                 status_code=403, content={"detail": "Origin not allowed"}
             )
 
+    # 許可されていないAPIを無効化する
+    def check_disabled_mutable_api():
+        if disable_mutable_api:
+            raise HTTPException(status_code=403, detail="エンジンの静的なデータを変更するAPIは無効化されています")
+
     engine_manifest_data = EngineManifestLoader(
         engine_root() / "engine_manifest.json", engine_root()
     ).load_manifest()
@@ -706,7 +713,12 @@ def get_presets() -> list[Preset]:
             raise HTTPException(status_code=422, detail=str(err))
         return presets
 
-    @app.post("/add_preset", response_model=int, tags=["その他"])
+    @app.post(
+        "/add_preset",
+        response_model=int,
+        tags=["その他"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def add_preset(preset: Preset) -> int:
         """
         新しいプリセットを追加します
@@ -728,7 +740,12 @@ def add_preset(preset: Preset) -> int:
             raise HTTPException(status_code=422, detail=str(err))
         return id
 
-    @app.post("/update_preset", response_model=int, tags=["その他"])
+    @app.post(
+        "/update_preset",
+        response_model=int,
+        tags=["その他"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def update_preset(preset: Preset) -> int:
         """
         既存のプリセットを更新します
@@ -750,7 +767,12 @@ def update_preset(preset: Preset) -> int:
             raise HTTPException(status_code=422, detail=str(err))
         return id
 
-    @app.post("/delete_preset", status_code=204, tags=["その他"])
+    @app.post(
+        "/delete_preset",
+        status_code=204,
+        tags=["その他"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def delete_preset(id: int) -> Response:
         """
         既存のプリセットを削除します
@@ -919,6 +941,7 @@ def installed_libraries() -> dict[str, InstalledLibraryInfo]:
             "/install_library/{library_uuid}",
             status_code=204,
             tags=["音声ライブラリ管理"],
+            dependencies=[Depends(check_disabled_mutable_api)],
         )
         async def install_library(
             library_uuid: str,
@@ -946,6 +969,7 @@ async def install_library(
             "/uninstall_library/{library_uuid}",
             status_code=204,
             tags=["音声ライブラリ管理"],
+            dependencies=[Depends(check_disabled_mutable_api)],
         )
         def uninstall_library(library_uuid: str) -> Response:
             """
@@ -1042,7 +1066,12 @@ def get_user_dict_words() -> dict[str, UserDictWord]:
             traceback.print_exc()
             raise HTTPException(status_code=422, detail="辞書の読み込みに失敗しました。")
 
-    @app.post("/user_dict_word", response_model=str, tags=["ユーザー辞書"])
+    @app.post(
+        "/user_dict_word",
+        response_model=str,
+        tags=["ユーザー辞書"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def add_user_dict_word(
         surface: str,
         pronunciation: str,
@@ -1083,7 +1112,12 @@ def add_user_dict_word(
             traceback.print_exc()
             raise HTTPException(status_code=422, detail="ユーザー辞書への追加に失敗しました。")
 
-    @app.put("/user_dict_word/{word_uuid}", status_code=204, tags=["ユーザー辞書"])
+    @app.put(
+        "/user_dict_word/{word_uuid}",
+        status_code=204,
+        tags=["ユーザー辞書"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def rewrite_user_dict_word(
         surface: str,
         pronunciation: str,
@@ -1130,7 +1164,12 @@ def rewrite_user_dict_word(
             traceback.print_exc()
             raise HTTPException(status_code=422, detail="ユーザー辞書の更新に失敗しました。")
 
-    @app.delete("/user_dict_word/{word_uuid}", status_code=204, tags=["ユーザー辞書"])
+    @app.delete(
+        "/user_dict_word/{word_uuid}",
+        status_code=204,
+        tags=["ユーザー辞書"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def delete_user_dict_word(word_uuid: str) -> Response:
         """
         ユーザー辞書に登録されている言葉を削除します。
@@ -1149,7 +1188,12 @@ def delete_user_dict_word(word_uuid: str) -> Response:
             traceback.print_exc()
             raise HTTPException(status_code=422, detail="ユーザー辞書の更新に失敗しました。")
 
-    @app.post("/import_user_dict", status_code=204, tags=["ユーザー辞書"])
+    @app.post(
+        "/import_user_dict",
+        status_code=204,
+        tags=["ユーザー辞書"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def import_user_dict_words(
         import_dict_data: dict[str, UserDictWord],
         override: bool,
@@ -1237,7 +1281,12 @@ def setting_get(request: Request) -> Response:
             },
         )
 
-    @app.post("/setting", response_class=Response, tags=["設定"])
+    @app.post(
+        "/setting",
+        response_class=Response,
+        tags=["設定"],
+        dependencies=[Depends(check_disabled_mutable_api)],
+    )
     def setting_post(
         request: Request,
         cors_policy_mode: str | None = Form(None),  # noqa: B008
@@ -1303,23 +1352,16 @@ def custom_openapi():
 def main() -> None:
     multiprocessing.freeze_support()
 
-    output_log_utf8 = os.getenv("VV_OUTPUT_LOG_UTF8", default="")
-    if output_log_utf8 == "1":
+    output_log_utf8 = decide_boolean_from_env("VV_OUTPUT_LOG_UTF8")
+    if output_log_utf8:
         set_output_log_utf8()
-    elif not (output_log_utf8 == "" or output_log_utf8 == "0"):
-        print(
-            "WARNING:  invalid VV_OUTPUT_LOG_UTF8 environment variable value",
-            file=sys.stderr,
-        )
 
     parser = argparse.ArgumentParser(description="VOICEVOX のエンジンです。")
     parser.add_argument(
         "--host", type=str, default="127.0.0.1", help="接続を受け付けるホストアドレスです。"
     )
     parser.add_argument("--port", type=int, default=50021, help="接続を受け付けるポート番号です。")
-    parser.add_argument(
-        "--use_gpu", action="store_true", help="指定するとGPUを使って音声合成するようになります。"
-    )
+    parser.add_argument("--use_gpu", action="store_true", help="GPUを使って音声合成するようになります。")
     parser.add_argument(
         "--voicevox_dir", type=Path, default=None, help="VOICEVOXのディレクトリパスです。"
     )
@@ -1340,12 +1382,12 @@ def main() -> None:
     parser.add_argument(
         "--enable_mock",
         action="store_true",
-        help="指定するとVOICEVOX COREを使わずモックで音声合成を行います。",
+        help="VOICEVOX COREを使わずモックで音声合成を行います。",
     )
     parser.add_argument(
         "--enable_cancellable_synthesis",
         action="store_true",
-        help="指定すると音声合成を途中でキャンセルできるようになります。",
+        help="音声合成を途中でキャンセルできるようになります。",
     )
     parser.add_argument(
         "--init_processes",
@@ -1354,7 +1396,7 @@ def main() -> None:
         help="cancellable_synthesis機能の初期化時に生成するプロセス数です。",
     )
     parser.add_argument(
-        "--load_all_models", action="store_true", help="指定すると起動時に全ての音声合成モデルを読み込みます。"
+        "--load_all_models", action="store_true", help="起動時に全ての音声合成モデルを読み込みます。"
     )
 
     # 引数へcpu_num_threadsの指定がなければ、環境変数をロールします。
@@ -1365,8 +1407,8 @@ def main() -> None:
         type=int,
         default=os.getenv("VV_CPU_NUM_THREADS") or None,
         help=(
-            "音声合成を行うスレッド数です。指定しないと、代わりに環境変数VV_CPU_NUM_THREADSの値が使われます。"
-            "VV_CPU_NUM_THREADSが空文字列でなく数値でもない場合はエラー終了します。"
+            "音声合成を行うスレッド数です。指定しない場合、代わりに環境変数 VV_CPU_NUM_THREADS の値が使われます。"
+            "VV_CPU_NUM_THREADS が空文字列でなく数値でもない場合はエラー終了します。"
         ),
     )
 
@@ -1374,7 +1416,7 @@ def main() -> None:
         "--output_log_utf8",
         action="store_true",
         help=(
-            "指定するとログ出力をUTF-8でおこないます。指定しないと、代わりに環境変数 VV_OUTPUT_LOG_UTF8 の値が使われます。"
+            "ログ出力をUTF-8でおこないます。指定しない場合、代わりに環境変数 VV_OUTPUT_LOG_UTF8 の値が使われます。"
             "VV_OUTPUT_LOG_UTF8 の値が1の場合はUTF-8で、0または空文字、値がない場合は環境によって自動的に決定されます。"
         ),
     )
@@ -1410,6 +1452,16 @@ def main() -> None:
         ),
     )
 
+    parser.add_argument(
+        "--disable_mutable_api",
+        action="store_true",
+        help=(
+            "辞書登録や設定変更など、エンジンの静的なデータを変更するAPIを無効化します。"
+            "指定しない場合、代わりに環境変数 VV_DISABLE_MUTABLE_API の値が使われます。"
+            "VV_DISABLE_MUTABLE_API の値が1の場合は無効化で、0または空文字、値がない場合は無視されます。"
+        ),
+    )
+
     args = parser.parse_args()
 
     if args.output_log_utf8:
@@ -1488,6 +1540,10 @@ def main() -> None:
         preset_path=preset_path,
     )
 
+    disable_mutable_api: bool = args.disable_mutable_api | decide_boolean_from_env(
+        "VV_DISABLE_MUTABLE_API"
+    )
+
     uvicorn.run(
         generate_app(
             synthesis_engines,
@@ -1499,6 +1555,7 @@ def main() -> None:
             root_dir=root_dir,
             cors_policy_mode=cors_policy_mode,
             allow_origin=allow_origin,
+            disable_mutable_api=disable_mutable_api,
         ),
         host=args.host,
         port=args.port,
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 3f2b158e8..b635cd998 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -26,20 +26,22 @@ def snapshot_json(snapshot: SnapshotAssertion):
 
 
 @pytest.fixture(scope="session")
-def client():
+def app_params():
     synthesis_engines, cores = make_synthesis_engines_and_cores(use_gpu=False)
     latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
     setting_loader = SettingLoader(Path("./not_exist.yaml"))
     preset_manager = PresetManager(  # FIXME: impl MockPresetManager
         preset_path=Path("./presets.yaml"),
     )
+    return {
+        "synthesis_engines": synthesis_engines,
+        "cores": cores,
+        "latest_core_version": latest_core_version,
+        "setting_loader": setting_loader,
+        "preset_manager": preset_manager,
+    }
 
-    return TestClient(
-        generate_app(
-            synthesis_engines=synthesis_engines,
-            cores=cores,
-            latest_core_version=latest_core_version,
-            setting_loader=setting_loader,
-            preset_manager=preset_manager,
-        )
-    )
+
+@pytest.fixture(scope="session")
+def client(app_params: dict) -> TestClient:
+    return TestClient(generate_app(**app_params))
diff --git a/test/e2e/test_disable_api.py b/test/e2e/test_disable_api.py
new file mode 100644
index 000000000..e60fa5470
--- /dev/null
+++ b/test/e2e/test_disable_api.py
@@ -0,0 +1,49 @@
+"""
+APIを無効化するテスト
+"""
+
+from typing import Literal
+
+from fastapi.testclient import TestClient
+from run import generate_app
+
+
+# clientとschemaとパスを受け取ってリクエストを送信し、レスポンスが403であることを確認する
+def _assert_request_and_response_403(
+    client: TestClient,
+    method: Literal["post", "get", "put", "delete"],
+    path: str,
+):
+    if method == "post":
+        response = client.post(path)
+    elif method == "get":
+        response = client.get(path)
+    elif method == "put":
+        response = client.put(path)
+    elif method == "delete":
+        response = client.delete(path)
+    else:
+        raise ValueError("methodはpost, get, put, deleteのいずれかである必要があります")
+
+    assert response.status_code == 403, f"{method} {path} が403を返しませんでした"
+
+
+def test_disable_mutable_api(app_params: dict):
+    """エンジンの静的なデータを変更するAPIを無効化するテスト"""
+    client = TestClient(generate_app(**app_params, disable_mutable_api=True))
+
+    # APIが無効化されているか確認
+    _assert_request_and_response_403(client, "post", "/add_preset")
+    _assert_request_and_response_403(client, "post", "/update_preset")
+    _assert_request_and_response_403(client, "post", "/delete_preset")
+    _assert_request_and_response_403(client, "post", "/install_library/dummy")
+    _assert_request_and_response_403(client, "post", "/uninstall_library/dummy")
+    _assert_request_and_response_403(client, "post", "/user_dict_word")
+    _assert_request_and_response_403(client, "put", "/user_dict_word/dummy")
+    _assert_request_and_response_403(client, "delete", "/user_dict_word/dummy")
+    _assert_request_and_response_403(client, "post", "/import_user_dict")
+    _assert_request_and_response_403(client, "post", "/setting")
+
+    # 他のAPIは有効
+    response = client.get("/version")
+    assert response.status_code == 200
diff --git a/voicevox_engine/utility/run_utility.py b/voicevox_engine/utility/run_utility.py
new file mode 100644
index 000000000..c61891b9f
--- /dev/null
+++ b/voicevox_engine/utility/run_utility.py
@@ -0,0 +1,23 @@
+import os
+import warnings
+
+
+def decide_boolean_from_env(env_name: str) -> bool:
+    """
+    環境変数からbool値を返す。
+
+    * 環境変数が"1"ならTrueを返す
+    * 環境変数が"0"か空白か存在しないならFalseを返す
+    * それ以外はwarningを出してFalseを返す
+    """
+    env = os.getenv(env_name, default="")
+    if env == "1":
+        return True
+    elif env == "" or env == "0":
+        return False
+    else:
+        warnings.warn(
+            f"Invalid environment variable value: {env_name}={env}",
+            stacklevel=1,
+        )
+        return False

From 0bd19f90110e5816e7e08c2bee0d080b97dbca01 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 27 Dec 2023 03:22:39 +0900
Subject: [PATCH 080/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`TTSEngine`=20?=
 =?UTF-8?q?=E3=81=AB=E8=BF=91=E3=81=84=E3=83=A2=E3=83=83=E3=82=AF=20(#936)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: `TTSEngine` に aligned なモック

* fix: `MockCoreWrapper` リストアクセス

* fix: 動作テスト削除のリバート
---
 test/test_mock_synthesis_engine.py           | 21 ++++----
 voicevox_engine/dev/core/mock.py             |  2 +-
 voicevox_engine/dev/synthesis_engine/mock.py | 53 ++------------------
 3 files changed, 15 insertions(+), 61 deletions(-)

diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py
index 820be6cc9..4bdf7d24e 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_synthesis_engine.py
@@ -106,24 +106,21 @@ def setUp(self):
         self.engine = MockTTSEngine(MockCoreWrapper())
 
     def test_replace_phoneme_length(self):
-        self.assertEqual(
-            self.engine.replace_phoneme_length(
-                accent_phrases=self.accent_phrases_hello_hiho,
-                style_id=0,
-            ),
-            self.accent_phrases_hello_hiho,
+        """`.replace_phoneme_length()` がエラー無く生成をおこなう"""
+        self.engine.replace_phoneme_length(
+            accent_phrases=self.accent_phrases_hello_hiho,
+            style_id=0,
         )
 
     def test_replace_mora_pitch(self):
-        self.assertEqual(
-            self.engine.replace_mora_pitch(
-                accent_phrases=self.accent_phrases_hello_hiho,
-                style_id=0,
-            ),
-            self.accent_phrases_hello_hiho,
+        """`.replace_mora_pitch()` がエラー無く生成をおこなう"""
+        self.engine.replace_mora_pitch(
+            accent_phrases=self.accent_phrases_hello_hiho,
+            style_id=0,
         )
 
     def test_synthesis(self):
+        """`.synthesis()` がエラー無く生成をおこなう"""
         self.engine.synthesis(
             AudioQuery(
                 accent_phrases=self.accent_phrases_hello_hiho,
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index de75ec0b6..52306d9c7 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -87,7 +87,7 @@ def yukarin_sa_forward(
         pitch = 100 * numpy.ones((1, length), dtype=numpy.float32)
         pitch[0, 0] = 0.0  # 開始無音 (pau)
         pitch[0, 1] = 200.0  # 分散 0 を避けるため
-        pitch[0, length] = 0.0  # 終了無音 (pau)
+        pitch[0, length - 1] = 0.0  # 終了無音 (pau)
         return pitch
 
     def decode_forward(
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index 4d2adb79d..b6e1a3044 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -1,67 +1,24 @@
 import copy
 from logging import getLogger
-from typing import Any, Dict, List
+from typing import Any, Dict
 
 import numpy as np
 from pyopenjtalk import tts
 from soxr import resample
 
-from ...core_adapter import CoreAdapter
 from ...core_wrapper import CoreWrapper
-from ...model import AccentPhrase, AudioQuery
-from ...tts_pipeline import TTSEngineBase
+from ...model import AudioQuery
+from ...tts_pipeline import TTSEngine
 from ...tts_pipeline.tts_engine import to_flatten_moras
 
 
-class MockTTSEngine(TTSEngineBase):
+class MockTTSEngine(TTSEngine):
     """
     TTSEngine [Mock]
     """
 
     def __init__(self, core: CoreWrapper):
-        super().__init__()
-        self.core = CoreAdapter(core)
-        # NOTE: self.coreは将来的に消す予定
-
-    def replace_phoneme_length(
-        self, accent_phrases: List[AccentPhrase], style_id: int
-    ) -> List[AccentPhrase]:
-        """
-        replace_phoneme_length 入力accent_phrasesを変更せずにそのまま返します [Mock]
-
-        Parameters
-        ----------
-        accent_phrases : List[AccentPhrase]
-            フレーズ句のリスト
-        style_id : int
-            スタイルID
-
-        Returns
-        -------
-        List[AccentPhrase]
-            フレーズ句のリスト（変更なし）
-        """
-        return accent_phrases
-
-    def replace_mora_pitch(
-        self, accent_phrases: List[AccentPhrase], style_id: int
-    ) -> List[AccentPhrase]:
-        """
-        replace_mora_pitch 入力accent_phrasesを変更せずにそのまま返します [Mock]
-
-        Parameters
-        ----------
-        accent_phrases : List[AccentPhrase]
-            フレーズ句のリスト
-        style_id : int
-            スタイルID
-
-        Returns
-        -------
-        List[AccentPhrase]
-            フレーズ句のリスト（変更なし）
-        """
-        return accent_phrases
+        super().__init__(core)
 
     def synthesis(
         self,

From 8ed8ba8a286757e6518be021eb6fdfb1df69cbcc Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 27 Dec 2023 12:34:57 +0900
Subject: [PATCH 081/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`get=5Fcores()`?=
 =?UTF-8?q?=20=E3=81=A8=20`synthesis=5Fengines`=20=E3=81=AE=E5=88=86?=
 =?UTF-8?q?=E9=9B=A2=20(#938)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `cores` と `synthesis_engines` の分離

* refactor: `.core` プライベート化

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 run.py                                     | 10 +++++-----
 voicevox_engine/tts_pipeline/tts_engine.py | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/run.py b/run.py
index 9380d4365..9d02b056c 100644
--- a/run.py
+++ b/run.py
@@ -134,7 +134,7 @@ def set_output_log_utf8() -> None:
 
 def generate_app(
     synthesis_engines: Dict[str, TTSEngineBase],
-    cores: Dict[str, CoreAdapter],  # NOTE: synthesis_engines の機能を一部代替予定
+    cores: Dict[str, CoreAdapter],
     latest_core_version: str,
     setting_loader: SettingLoader,
     preset_manager: PresetManager,
@@ -244,9 +244,9 @@ def get_engine(core_version: Optional[str]) -> TTSEngineBase:
     def get_core(core_version: Optional[str]) -> CoreAdapter:
         """指定したバージョンのコアを取得する"""
         if core_version is None:
-            return synthesis_engines[latest_core_version].core
-        if core_version in synthesis_engines:
-            return synthesis_engines[core_version].core
+            return cores[latest_core_version]
+        if core_version in cores:
+            return cores[core_version]
         raise HTTPException(status_code=422, detail="不明なバージョンです")
 
     @app.post(
@@ -796,7 +796,7 @@ def version() -> str:
     @app.get("/core_versions", response_model=list[str], tags=["その他"])
     def core_versions() -> Response:
         return Response(
-            content=json.dumps(list(synthesis_engines.keys())),
+            content=json.dumps(list(cores.keys())),
             media_type="application/json",
         )
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index fc1af32a6..71cc7d195 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -248,8 +248,8 @@ class TTSEngine(TTSEngineBase):
 
     def __init__(self, core: CoreWrapper):
         super().__init__()
-        self.core = CoreAdapter(core)
-        # NOTE: self.coreは将来的に消す予定
+        self._core = CoreAdapter(core)
+        # NOTE: self._coreは将来的に消す予定
 
     def replace_phoneme_length(
         self, accent_phrases: list[AccentPhrase], style_id: int
@@ -266,7 +266,7 @@ def replace_phoneme_length(
         phoneme_ids = numpy.array([p.phoneme_id for p in phonemes], dtype=numpy.int64)
 
         # コアを用いて音素長を生成する
-        phoneme_lengths = self.core.safe_yukarin_s_forward(phoneme_ids, style_id)
+        phoneme_lengths = self._core.safe_yukarin_s_forward(phoneme_ids, style_id)
 
         # 生成結果でモーラ内の音素長属性を置換する
         vowel_indexes = [
@@ -397,7 +397,7 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int):
         )
 
         # 今までに生成された情報をyukarin_sa_forwardにかけ、推論器によってモーラごとに適切な音高(ピッチ)を割り当てる
-        f0_list = self.core.safe_yukarin_sa_forward(
+        f0_list = self._core.safe_yukarin_sa_forward(
             vowel_phoneme_list,
             consonant_phoneme_list,
             start_accent_list,
@@ -433,6 +433,6 @@ def synthesis(
         )
 
         phoneme, f0 = query_to_decoder_feature(query)
-        raw_wave, sr_raw_wave = self.core.safe_decode_forward(phoneme, f0, style_id)
+        raw_wave, sr_raw_wave = self._core.safe_decode_forward(phoneme, f0, style_id)
         wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
         return wave

From a5ba8fd735e3233e819c42c0383bcaf76ba0b9b0 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Thu, 28 Dec 2023 09:34:29 +0900
Subject: [PATCH 082/177] =?UTF-8?q?FIX:=20=E5=9E=8B=E9=96=A2=E4=BF=82?=
 =?UTF-8?q?=E3=81=AE=E3=83=90=E3=82=B0=E3=82=92=E4=BF=AE=E6=AD=A3=20(#940)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 run.py                                          | 6 ++++--
 test/e2e/conftest.py                            | 4 +++-
 voicevox_engine/cancellable_engine.py           | 4 +++-
 voicevox_engine/utility/core_version_utility.py | 4 ++--
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/run.py b/run.py
index 9d02b056c..bc2be504b 100644
--- a/run.py
+++ b/run.py
@@ -521,7 +521,7 @@ def cancellable_synthesis(
         summary="複数まとめて音声合成する",
     )
     def multi_synthesis(
-        queries: list[AccentPhrase],
+        queries: list[AudioQuery],
         style_id: int | None = Query(default=None),  # noqa: B008
         speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
@@ -1486,7 +1486,9 @@ def main() -> None:
         load_all_models=load_all_models,
     )
     assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
-    latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
+    latest_core_version = get_latest_core_version(
+        versions=list(synthesis_engines.keys())
+    )
 
     # Cancellable Engine
     enable_cancellable_synthesis: bool = args.enable_cancellable_synthesis
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index b635cd998..2859c51a9 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -28,7 +28,9 @@ def snapshot_json(snapshot: SnapshotAssertion):
 @pytest.fixture(scope="session")
 def app_params():
     synthesis_engines, cores = make_synthesis_engines_and_cores(use_gpu=False)
-    latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
+    latest_core_version = get_latest_core_version(
+        versions=list(synthesis_engines.keys())
+    )
     setting_loader = SettingLoader(Path("./not_exist.yaml"))
     preset_manager = PresetManager(  # FIXME: impl MockPresetManager
         preset_path=Path("./presets.yaml"),
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index 8de52cb3f..ae93d822c 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -228,7 +228,9 @@ def start_synthesis_subprocess(
         enable_mock=enable_mock,
     )
     assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
-    latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
+    latest_core_version = get_latest_core_version(
+        versions=list(synthesis_engines.keys())
+    )
     while True:
         try:
             query, style_id, core_version = sub_proc_con.recv()
diff --git a/voicevox_engine/utility/core_version_utility.py b/voicevox_engine/utility/core_version_utility.py
index 25f2d3a3e..9811c25f0 100644
--- a/voicevox_engine/utility/core_version_utility.py
+++ b/voicevox_engine/utility/core_version_utility.py
@@ -1,4 +1,4 @@
-from typing import Iterable
+from typing import Sequence
 
 from semver.version import Version
 
@@ -7,7 +7,7 @@ def parse_core_version(version: str) -> Version:
     return Version.parse(version)
 
 
-def get_latest_core_version(versions: Iterable[str]) -> str:
+def get_latest_core_version(versions: Sequence[str]) -> str:
     if len(versions) == 0:
         raise Exception("versions must be non-empty.")
 

From 6dd20695562f74b098b739060984a6e28eae2ee9 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 29 Dec 2023 13:35:22 +0900
Subject: [PATCH 083/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=B3?=
 =?UTF-8?q?=E3=82=A2=E7=94=9F=E6=88=90=E3=81=A8TTSEngine=E7=94=9F=E6=88=90?=
 =?UTF-8?q?=E3=81=AE=E5=88=86=E9=9B=A2=20(#937)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: コア生成とTTSEngine生成の分離

* Update voicevox_engine/tts_pipeline/make_tts_engines.py

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

* fix: FIXME

* fix: リネーム

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 run.py                                        |  9 ++++--
 test/e2e/conftest.py                          |  5 ++--
 voicevox_engine/cancellable_engine.py         |  6 ++--
 voicevox_engine/tts_pipeline/__init__.py      |  7 +++--
 .../tts_pipeline/make_tts_engines.py          | 28 +++++++++----------
 voicevox_engine/tts_pipeline/tts_engine.py    | 17 +++++++++++
 6 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/run.py b/run.py
index bc2be504b..1f899bca3 100644
--- a/run.py
+++ b/run.py
@@ -66,7 +66,11 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.tts_pipeline import TTSEngineBase, make_synthesis_engines_and_cores
+from voicevox_engine.tts_pipeline import (
+    TTSEngineBase,
+    make_cores,
+    make_tts_engines_from_cores,
+)
 from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
@@ -1476,7 +1480,7 @@ def main() -> None:
     cpu_num_threads: int | None = args.cpu_num_threads
     load_all_models: bool = args.load_all_models
 
-    synthesis_engines, cores = make_synthesis_engines_and_cores(
+    cores = make_cores(
         use_gpu=use_gpu,
         voicelib_dirs=voicelib_dirs,
         voicevox_dir=voicevox_dir,
@@ -1485,6 +1489,7 @@ def main() -> None:
         enable_mock=enable_mock,
         load_all_models=load_all_models,
     )
+    synthesis_engines = make_tts_engines_from_cores(cores)
     assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
     latest_core_version = get_latest_core_version(
         versions=list(synthesis_engines.keys())
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 2859c51a9..ddae68de8 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -8,7 +8,7 @@
 
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import SettingLoader
-from voicevox_engine.tts_pipeline import make_synthesis_engines_and_cores
+from voicevox_engine.tts_pipeline import make_cores, make_tts_engines_from_cores
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
 
@@ -27,7 +27,8 @@ def snapshot_json(snapshot: SnapshotAssertion):
 
 @pytest.fixture(scope="session")
 def app_params():
-    synthesis_engines, cores = make_synthesis_engines_and_cores(use_gpu=False)
+    cores = make_cores(use_gpu=False)
+    synthesis_engines = make_tts_engines_from_cores(cores)
     latest_core_version = get_latest_core_version(
         versions=list(synthesis_engines.keys())
     )
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index ae93d822c..4fc3006e2 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -17,7 +17,7 @@
 from fastapi import HTTPException, Request
 
 from .model import AudioQuery
-from .tts_pipeline import make_synthesis_engines_and_cores
+from .tts_pipeline import make_cores, make_tts_engines_from_cores
 from .utility import get_latest_core_version
 
 
@@ -219,7 +219,7 @@ def start_synthesis_subprocess(
         メインプロセスと通信するためのPipe
     """
 
-    synthesis_engines, _ = make_synthesis_engines_and_cores(
+    cores = make_cores(
         use_gpu=use_gpu,
         voicelib_dirs=voicelib_dirs,
         voicevox_dir=voicevox_dir,
@@ -227,6 +227,8 @@ def start_synthesis_subprocess(
         cpu_num_threads=cpu_num_threads,
         enable_mock=enable_mock,
     )
+    synthesis_engines = make_tts_engines_from_cores(cores)
+
     assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
     latest_core_version = get_latest_core_version(
         versions=list(synthesis_engines.keys())
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
index 336416982..6a2aea152 100644
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -1,12 +1,13 @@
 from ..core_wrapper import CoreWrapper, load_runtime_lib
-from .make_tts_engines import make_synthesis_engines_and_cores
-from .tts_engine import TTSEngine
+from .make_tts_engines import make_cores
+from .tts_engine import TTSEngine, make_tts_engines_from_cores
 from .tts_engine_base import TTSEngineBase
 
 __all__ = [
     "CoreWrapper",
     "load_runtime_lib",
-    "make_synthesis_engines_and_cores",
+    "make_cores",
+    "make_tts_engines_from_cores",
     "TTSEngine",
     "TTSEngineBase",
 ]
diff --git a/voicevox_engine/tts_pipeline/make_tts_engines.py b/voicevox_engine/tts_pipeline/make_tts_engines.py
index a282b8f3f..006ef318a 100644
--- a/voicevox_engine/tts_pipeline/make_tts_engines.py
+++ b/voicevox_engine/tts_pipeline/make_tts_engines.py
@@ -5,10 +5,13 @@
 
 from ..core_wrapper import CoreWrapper, load_runtime_lib
 from ..utility import engine_root, get_save_dir
-from .tts_engine import CoreAdapter, TTSEngine, TTSEngineBase
+from .tts_engine import CoreAdapter
 
+MOCK_VER = "0.0.0"
 
-def make_synthesis_engines_and_cores(
+
+# FIXME: ファイル名を変えるか関数の場所を変える
+def make_cores(
     use_gpu: bool,
     voicelib_dirs: Optional[List[Path]] = None,
     voicevox_dir: Optional[Path] = None,
@@ -16,9 +19,9 @@ def make_synthesis_engines_and_cores(
     cpu_num_threads: Optional[int] = None,
     enable_mock: bool = True,
     load_all_models: bool = False,
-) -> tuple[dict[str, TTSEngineBase], dict[str, CoreAdapter]]:
+) -> dict[str, CoreAdapter]:
     """
-    音声ライブラリをロードして、音声合成エンジンを生成
+    音声ライブラリをロードしてコアを生成
 
     Parameters
     ----------
@@ -72,15 +75,14 @@ def make_synthesis_engines_and_cores(
     # ランタイムをロードする
     load_runtime_lib(runtime_dirs)
 
-    # コアをロードし `cores` と `synthesis_engines` へ登録する
+    # コアをロードし `cores` へ登録する
     cores: dict[str, CoreAdapter] = {}
-    synthesis_engines: dict[str, TTSEngineBase] = {}
 
     if not enable_mock:
 
         def load_core_library(core_dir: Path, suppress_error: bool = False):
             """
-            指定されたコアをロードし `synthesis_engines` へ登録する。
+            指定されたコアをロードし `cores` へ登録する。
             Parameters
             ----------
             core_dir : Path
@@ -96,14 +98,13 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
                 metas = json.loads(core.metas())
                 core_version = metas[0]["version"]
                 print(f"Info: Loading core {core_version}.")
-                if core_version in synthesis_engines:
+                if core_version in cores:
                     print(
                         "Warning: Core loading is skipped because of version duplication.",
                         file=sys.stderr,
                     )
                 else:
                     cores[core_version] = CoreAdapter(core)
-                    synthesis_engines[core_version] = TTSEngine(core)
             except Exception:
                 # コアでなかった場合のエラーを抑制する
                 if not suppress_error:
@@ -130,13 +131,10 @@ def load_core_library(core_dir: Path, suppress_error: bool = False):
     else:
         # モック追加
         from ..dev.core import MockCoreWrapper
-        from ..dev.synthesis_engine import MockTTSEngine
 
-        mock_ver = "0.0.0"
-        if mock_ver not in synthesis_engines:
+        if MOCK_VER not in cores:
             print("Info: Loading mock.")
             core = MockCoreWrapper()
-            cores[mock_ver] = CoreAdapter(core)
-            synthesis_engines[mock_ver] = MockTTSEngine(core)
+            cores[MOCK_VER] = CoreAdapter(core)
 
-    return synthesis_engines, cores
+    return cores
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 71cc7d195..612d8b43d 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -436,3 +436,20 @@ def synthesis(
         raw_wave, sr_raw_wave = self._core.safe_decode_forward(phoneme, f0, style_id)
         wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
         return wave
+
+
+def make_tts_engines_from_cores(
+    cores: dict[str, CoreAdapter]
+) -> dict[str, TTSEngineBase]:
+    """コア一覧からTTSエンジン一覧を生成する"""
+    # FIXME: `MOCK_VER` を循環 import 無しに `make_cores()` 関連モジュールから import する
+    MOCK_VER = "0.0.0"
+    tts_engines: dict[str, TTSEngineBase] = {}
+    for ver, core in cores.items():
+        if ver == MOCK_VER:
+            from ..dev.synthesis_engine import MockTTSEngine
+
+            tts_engines[ver] = MockTTSEngine(core.core)
+        else:
+            tts_engines[ver] = TTSEngine(core.core)
+    return tts_engines

From ad97ef27fb7ae74f9e4a6c97fb5dcea5ae5a047d Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 29 Dec 2023 13:36:52 +0900
Subject: [PATCH 084/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`mora=5Flist`=20?=
 =?UTF-8?q?=E5=9E=8B=20(#941)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `mora_list` リスト → タプル

* fix: type
---
 voicevox_engine/tts_pipeline/mora_list.py | 331 +++++++++++-----------
 1 file changed, 166 insertions(+), 165 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/mora_list.py b/voicevox_engine/tts_pipeline/mora_list.py
index 5a49f4a3a..b884f6dee 100644
--- a/voicevox_engine/tts_pipeline/mora_list.py
+++ b/voicevox_engine/tts_pipeline/mora_list.py
@@ -41,172 +41,173 @@
 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 """
-_mora_list_minimum = [
-    ["ヴォ", "v", "o"],
-    ["ヴェ", "v", "e"],
-    ["ヴィ", "v", "i"],
-    ["ヴァ", "v", "a"],
-    ["ヴ", "v", "u"],
-    ["ン", "", "N"],
-    ["ワ", "w", "a"],
-    ["ロ", "r", "o"],
-    ["レ", "r", "e"],
-    ["ル", "r", "u"],
-    ["リョ", "ry", "o"],
-    ["リュ", "ry", "u"],
-    ["リャ", "ry", "a"],
-    ["リェ", "ry", "e"],
-    ["リ", "r", "i"],
-    ["ラ", "r", "a"],
-    ["ヨ", "y", "o"],
-    ["ユ", "y", "u"],
-    ["ヤ", "y", "a"],
-    ["モ", "m", "o"],
-    ["メ", "m", "e"],
-    ["ム", "m", "u"],
-    ["ミョ", "my", "o"],
-    ["ミュ", "my", "u"],
-    ["ミャ", "my", "a"],
-    ["ミェ", "my", "e"],
-    ["ミ", "m", "i"],
-    ["マ", "m", "a"],
-    ["ポ", "p", "o"],
-    ["ボ", "b", "o"],
-    ["ホ", "h", "o"],
-    ["ペ", "p", "e"],
-    ["ベ", "b", "e"],
-    ["ヘ", "h", "e"],
-    ["プ", "p", "u"],
-    ["ブ", "b", "u"],
-    ["フォ", "f", "o"],
-    ["フェ", "f", "e"],
-    ["フィ", "f", "i"],
-    ["ファ", "f", "a"],
-    ["フ", "f", "u"],
-    ["ピョ", "py", "o"],
-    ["ピュ", "py", "u"],
-    ["ピャ", "py", "a"],
-    ["ピェ", "py", "e"],
-    ["ピ", "p", "i"],
-    ["ビョ", "by", "o"],
-    ["ビュ", "by", "u"],
-    ["ビャ", "by", "a"],
-    ["ビェ", "by", "e"],
-    ["ビ", "b", "i"],
-    ["ヒョ", "hy", "o"],
-    ["ヒュ", "hy", "u"],
-    ["ヒャ", "hy", "a"],
-    ["ヒェ", "hy", "e"],
-    ["ヒ", "h", "i"],
-    ["パ", "p", "a"],
-    ["バ", "b", "a"],
-    ["ハ", "h", "a"],
-    ["ノ", "n", "o"],
-    ["ネ", "n", "e"],
-    ["ヌ", "n", "u"],
-    ["ニョ", "ny", "o"],
-    ["ニュ", "ny", "u"],
-    ["ニャ", "ny", "a"],
-    ["ニェ", "ny", "e"],
-    ["ニ", "n", "i"],
-    ["ナ", "n", "a"],
-    ["ドゥ", "d", "u"],
-    ["ド", "d", "o"],
-    ["トゥ", "t", "u"],
-    ["ト", "t", "o"],
-    ["デョ", "dy", "o"],
-    ["デュ", "dy", "u"],
-    ["デャ", "dy", "a"],
-    ["デェ", "dy", "e"],
-    ["ディ", "d", "i"],
-    ["デ", "d", "e"],
-    ["テョ", "ty", "o"],
-    ["テュ", "ty", "u"],
-    ["テャ", "ty", "a"],
-    ["ティ", "t", "i"],
-    ["テ", "t", "e"],
-    ["ツォ", "ts", "o"],
-    ["ツェ", "ts", "e"],
-    ["ツィ", "ts", "i"],
-    ["ツァ", "ts", "a"],
-    ["ツ", "ts", "u"],
-    ["ッ", "", "cl"],
-    ["チョ", "ch", "o"],
-    ["チュ", "ch", "u"],
-    ["チャ", "ch", "a"],
-    ["チェ", "ch", "e"],
-    ["チ", "ch", "i"],
-    ["ダ", "d", "a"],
-    ["タ", "t", "a"],
-    ["ゾ", "z", "o"],
-    ["ソ", "s", "o"],
-    ["ゼ", "z", "e"],
-    ["セ", "s", "e"],
-    ["ズィ", "z", "i"],
-    ["ズ", "z", "u"],
-    ["スィ", "s", "i"],
-    ["ス", "s", "u"],
-    ["ジョ", "j", "o"],
-    ["ジュ", "j", "u"],
-    ["ジャ", "j", "a"],
-    ["ジェ", "j", "e"],
-    ["ジ", "j", "i"],
-    ["ショ", "sh", "o"],
-    ["シュ", "sh", "u"],
-    ["シャ", "sh", "a"],
-    ["シェ", "sh", "e"],
-    ["シ", "sh", "i"],
-    ["ザ", "z", "a"],
-    ["サ", "s", "a"],
-    ["ゴ", "g", "o"],
-    ["コ", "k", "o"],
-    ["ゲ", "g", "e"],
-    ["ケ", "k", "e"],
-    ["グヮ", "gw", "a"],
-    ["グ", "g", "u"],
-    ["クヮ", "kw", "a"],
-    ["ク", "k", "u"],
-    ["ギョ", "gy", "o"],
-    ["ギュ", "gy", "u"],
-    ["ギャ", "gy", "a"],
-    ["ギェ", "gy", "e"],
-    ["ギ", "g", "i"],
-    ["キョ", "ky", "o"],
-    ["キュ", "ky", "u"],
-    ["キャ", "ky", "a"],
-    ["キェ", "ky", "e"],
-    ["キ", "k", "i"],
-    ["ガ", "g", "a"],
-    ["カ", "k", "a"],
-    ["オ", "", "o"],
-    ["エ", "", "e"],
-    ["ウォ", "w", "o"],
-    ["ウェ", "w", "e"],
-    ["ウィ", "w", "i"],
-    ["ウ", "", "u"],
-    ["イェ", "y", "e"],
-    ["イ", "", "i"],
-    ["ア", "", "a"],
+
+_mora_list_minimum: list[tuple[str, str, str]] = [
+    ("ヴォ", "v", "o"),
+    ("ヴェ", "v", "e"),
+    ("ヴィ", "v", "i"),
+    ("ヴァ", "v", "a"),
+    ("ヴ", "v", "u"),
+    ("ン", "", "N"),
+    ("ワ", "w", "a"),
+    ("ロ", "r", "o"),
+    ("レ", "r", "e"),
+    ("ル", "r", "u"),
+    ("リョ", "ry", "o"),
+    ("リュ", "ry", "u"),
+    ("リャ", "ry", "a"),
+    ("リェ", "ry", "e"),
+    ("リ", "r", "i"),
+    ("ラ", "r", "a"),
+    ("ヨ", "y", "o"),
+    ("ユ", "y", "u"),
+    ("ヤ", "y", "a"),
+    ("モ", "m", "o"),
+    ("メ", "m", "e"),
+    ("ム", "m", "u"),
+    ("ミョ", "my", "o"),
+    ("ミュ", "my", "u"),
+    ("ミャ", "my", "a"),
+    ("ミェ", "my", "e"),
+    ("ミ", "m", "i"),
+    ("マ", "m", "a"),
+    ("ポ", "p", "o"),
+    ("ボ", "b", "o"),
+    ("ホ", "h", "o"),
+    ("ペ", "p", "e"),
+    ("ベ", "b", "e"),
+    ("ヘ", "h", "e"),
+    ("プ", "p", "u"),
+    ("ブ", "b", "u"),
+    ("フォ", "f", "o"),
+    ("フェ", "f", "e"),
+    ("フィ", "f", "i"),
+    ("ファ", "f", "a"),
+    ("フ", "f", "u"),
+    ("ピョ", "py", "o"),
+    ("ピュ", "py", "u"),
+    ("ピャ", "py", "a"),
+    ("ピェ", "py", "e"),
+    ("ピ", "p", "i"),
+    ("ビョ", "by", "o"),
+    ("ビュ", "by", "u"),
+    ("ビャ", "by", "a"),
+    ("ビェ", "by", "e"),
+    ("ビ", "b", "i"),
+    ("ヒョ", "hy", "o"),
+    ("ヒュ", "hy", "u"),
+    ("ヒャ", "hy", "a"),
+    ("ヒェ", "hy", "e"),
+    ("ヒ", "h", "i"),
+    ("パ", "p", "a"),
+    ("バ", "b", "a"),
+    ("ハ", "h", "a"),
+    ("ノ", "n", "o"),
+    ("ネ", "n", "e"),
+    ("ヌ", "n", "u"),
+    ("ニョ", "ny", "o"),
+    ("ニュ", "ny", "u"),
+    ("ニャ", "ny", "a"),
+    ("ニェ", "ny", "e"),
+    ("ニ", "n", "i"),
+    ("ナ", "n", "a"),
+    ("ドゥ", "d", "u"),
+    ("ド", "d", "o"),
+    ("トゥ", "t", "u"),
+    ("ト", "t", "o"),
+    ("デョ", "dy", "o"),
+    ("デュ", "dy", "u"),
+    ("デャ", "dy", "a"),
+    ("デェ", "dy", "e"),
+    ("ディ", "d", "i"),
+    ("デ", "d", "e"),
+    ("テョ", "ty", "o"),
+    ("テュ", "ty", "u"),
+    ("テャ", "ty", "a"),
+    ("ティ", "t", "i"),
+    ("テ", "t", "e"),
+    ("ツォ", "ts", "o"),
+    ("ツェ", "ts", "e"),
+    ("ツィ", "ts", "i"),
+    ("ツァ", "ts", "a"),
+    ("ツ", "ts", "u"),
+    ("ッ", "", "cl"),
+    ("チョ", "ch", "o"),
+    ("チュ", "ch", "u"),
+    ("チャ", "ch", "a"),
+    ("チェ", "ch", "e"),
+    ("チ", "ch", "i"),
+    ("ダ", "d", "a"),
+    ("タ", "t", "a"),
+    ("ゾ", "z", "o"),
+    ("ソ", "s", "o"),
+    ("ゼ", "z", "e"),
+    ("セ", "s", "e"),
+    ("ズィ", "z", "i"),
+    ("ズ", "z", "u"),
+    ("スィ", "s", "i"),
+    ("ス", "s", "u"),
+    ("ジョ", "j", "o"),
+    ("ジュ", "j", "u"),
+    ("ジャ", "j", "a"),
+    ("ジェ", "j", "e"),
+    ("ジ", "j", "i"),
+    ("ショ", "sh", "o"),
+    ("シュ", "sh", "u"),
+    ("シャ", "sh", "a"),
+    ("シェ", "sh", "e"),
+    ("シ", "sh", "i"),
+    ("ザ", "z", "a"),
+    ("サ", "s", "a"),
+    ("ゴ", "g", "o"),
+    ("コ", "k", "o"),
+    ("ゲ", "g", "e"),
+    ("ケ", "k", "e"),
+    ("グヮ", "gw", "a"),
+    ("グ", "g", "u"),
+    ("クヮ", "kw", "a"),
+    ("ク", "k", "u"),
+    ("ギョ", "gy", "o"),
+    ("ギュ", "gy", "u"),
+    ("ギャ", "gy", "a"),
+    ("ギェ", "gy", "e"),
+    ("ギ", "g", "i"),
+    ("キョ", "ky", "o"),
+    ("キュ", "ky", "u"),
+    ("キャ", "ky", "a"),
+    ("キェ", "ky", "e"),
+    ("キ", "k", "i"),
+    ("ガ", "g", "a"),
+    ("カ", "k", "a"),
+    ("オ", "", "o"),
+    ("エ", "", "e"),
+    ("ウォ", "w", "o"),
+    ("ウェ", "w", "e"),
+    ("ウィ", "w", "i"),
+    ("ウ", "", "u"),
+    ("イェ", "y", "e"),
+    ("イ", "", "i"),
+    ("ア", "", "a"),
 ]
-_mora_list_additional = [
-    ["ヴョ", "by", "o"],
-    ["ヴュ", "by", "u"],
-    ["ヴャ", "by", "a"],
-    ["ヲ", "", "o"],
-    ["ヱ", "", "e"],
-    ["ヰ", "", "i"],
-    ["ヮ", "w", "a"],
-    ["ョ", "y", "o"],
-    ["ュ", "y", "u"],
-    ["ヅ", "z", "u"],
-    ["ヂ", "j", "i"],
-    ["ヶ", "k", "e"],
-    ["ャ", "y", "a"],
-    ["ォ", "", "o"],
-    ["ェ", "", "e"],
-    ["ゥ", "", "u"],
-    ["ィ", "", "i"],
-    ["ァ", "", "a"],
+_mora_list_additional: list[tuple[str, str, str]] = [
+    ("ヴョ", "by", "o"),
+    ("ヴュ", "by", "u"),
+    ("ヴャ", "by", "a"),
+    ("ヲ", "", "o"),
+    ("ヱ", "", "e"),
+    ("ヰ", "", "i"),
+    ("ヮ", "w", "a"),
+    ("ョ", "y", "o"),
+    ("ュ", "y", "u"),
+    ("ヅ", "z", "u"),
+    ("ヂ", "j", "i"),
+    ("ヶ", "k", "e"),
+    ("ャ", "y", "a"),
+    ("ォ", "", "o"),
+    ("ェ", "", "e"),
+    ("ゥ", "", "u"),
+    ("ィ", "", "i"),
+    ("ァ", "", "a"),
 ]
 
 openjtalk_mora2text = {

From fc3ccc64ef95e059cb2bc652d9cde086116564a3 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Fri, 29 Dec 2023 21:09:34 +0900
Subject: [PATCH 085/177] =?UTF-8?q?ENH:=20mypy=E3=82=92=E6=9C=89=E5=8A=B9?=
 =?UTF-8?q?=E5=8C=96=20(#943)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ENH: mypyを有効化

* ADD: types-PyYAMLを追加

* FIX: stdout/errの型チェックとエラー処理を追加

* ADD: TODOコメント追加

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

* FIX: コメント追記

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 poetry.lock                                   | 13 +++++-
 pyproject.toml                                | 14 +++---
 requirements-test.txt                         |  1 +
 run.py                                        | 44 ++++++++++++-------
 setup.cfg                                     |  3 +-
 voicevox_engine/core_adapter.py               |  2 +-
 voicevox_engine/core_wrapper.py               | 10 ++---
 voicevox_engine/dev/core/mock.py              |  2 +-
 voicevox_engine/library_manager.py            | 10 ++---
 voicevox_engine/metas/MetasStore.py           | 15 +++++--
 voicevox_engine/model.py                      | 10 ++---
 voicevox_engine/morphing.py                   |  4 +-
 voicevox_engine/preset/PresetManager.py       | 13 +++---
 voicevox_engine/setting/SettingLoader.py      |  7 ++-
 .../acoustic_feature_extractor.py             |  2 +-
 .../tts_pipeline/make_tts_engines.py          |  2 +-
 voicevox_engine/tts_pipeline/text_analyzer.py |  4 +-
 voicevox_engine/tts_pipeline/tts_engine.py    | 29 +++++++-----
 voicevox_engine/user_dict.py                  | 13 +++---
 .../utility/core_version_utility.py           |  4 +-
 voicevox_engine/utility/mutex_utility.py      |  6 ++-
 21 files changed, 126 insertions(+), 82 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a62873ac8..c231a3ea8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2240,6 +2240,17 @@ files = [
     {file = "trove_classifiers-2023.8.7-py3-none-any.whl", hash = "sha256:a676626a31286130d56de2ea1232484df97c567eb429d56cfcb0637e681ecf09"},
 ]
 
+[[package]]
+name = "types-pyyaml"
+version = "6.0.12.12"
+description = "Typing stubs for PyYAML"
+optional = false
+python-versions = "*"
+files = [
+    {file = "types-PyYAML-6.0.12.12.tar.gz", hash = "sha256:334373d392fde0fdf95af5c3f1661885fa10c52167b14593eb856289e1855062"},
+    {file = "types_PyYAML-6.0.12.12-py3-none-any.whl", hash = "sha256:c05bc6c158facb0676674b7f11fe3960db4f389718e19e62bd2b84d6205cfd24"},
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.7.1"
@@ -2431,4 +2442,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.11"
-content-hash = "4711e0905b713acdd1e99867b48670e76e6840da7c7b853d3a5248de2d1f68b2"
+content-hash = "cf49f43d1965899202ef766f3912901c6f8309ec29dc83f4902f568109f5cba4"
diff --git a/pyproject.toml b/pyproject.toml
index dbf9c1d30..3cf991f3f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,20 +5,23 @@ version = "0.10.5"
 enable_black = true
 enable_flake8 = true
 enable_isort = true
-enable_mypy = false    # TODO: eliminate errors and enable at CI
-mypy_preset = "entry"  # TODO: "strict"
+enable_mypy = true
+mypy_preset = "entry" # TODO: "strict"
+mypy_plugins = [
+  { function = "numpy.typing.mypy_plugin" },
+  { function = "pydantic.mypy" },
+]
 line_length = 88
 py_version = "py311"
 isort_known_first_party = ["voicevox_engine"]
 isort_known_third_party = ["numpy"]
 [[tool.pysen.lint.mypy_targets]]
-  paths = [".", "voicevox_engine/"]
+paths = ["run.py", "voicevox_engine/"] # TODO: paths = ["."]
 
 [tool.black] # automatically generated by pysen
 # pysen ignores and overwrites any modifications
 line-length = 88
-target-version = ["py310", "py311"]
-
+target-version = ["py311"]
 
 [tool.isort] # automatically generated by pysen
 # pysen ignores and overwrites any modifications
@@ -76,6 +79,7 @@ coveralls = "^3.2.0"
 poetry = "^1.3.1"
 httpx = "^0.25.0"          # NOTE: required by fastapi.testclient.TestClient
 syrupy = "^4.6.0"
+types-pyyaml = "^6.0"
 
 [tool.poetry.group.license.dependencies]
 pip-licenses = "^4.2.0"
diff --git a/requirements-test.txt b/requirements-test.txt
index ebc32d97c..771c5769c 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -86,6 +86,7 @@ syrupy==4.6.0 ; python_version >= "3.11" and python_version < "3.12"
 tomlkit==0.12.1 ; python_version >= "3.11" and python_version < "3.12"
 tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12"
 trove-classifiers==2023.8.7 ; python_version >= "3.11" and python_version < "3.12"
+types-pyyaml==6.0.12.12 ; python_version >= "3.11" and python_version < "3.12"
 typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12"
 unidiff==0.7.5 ; python_version >= "3.11" and python_version < "3.12"
 urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/run.py b/run.py
index 1f899bca3..1d94100a4 100644
--- a/run.py
+++ b/run.py
@@ -9,11 +9,12 @@
 import traceback
 import warnings
 import zipfile
+from collections.abc import Awaitable, Callable
 from functools import lru_cache
 from io import BytesIO, TextIOWrapper
 from pathlib import Path
 from tempfile import NamedTemporaryFile, TemporaryFile
-from typing import Any, Dict, List, Optional
+from typing import Annotated, Any, Dict, List, Optional
 
 import soundfile
 import uvicorn
@@ -22,7 +23,7 @@
 from fastapi.openapi.utils import get_openapi
 from fastapi.responses import JSONResponse
 from fastapi.templating import Jinja2Templates
-from pydantic import ValidationError, conint
+from pydantic import ValidationError
 from starlette.background import BackgroundTask
 from starlette.responses import FileResponse
 
@@ -117,23 +118,30 @@ def set_output_log_utf8() -> None:
     """
     # コンソールがない環境だとNone https://docs.python.org/ja/3/library/sys.html#sys.__stdin__
     if sys.stdout is not None:
-        # 必ずしもreconfigure()が実装されているとは限らない
-        try:
+        if isinstance(sys.stdout, TextIOWrapper):
             sys.stdout.reconfigure(encoding="utf-8")
-        except AttributeError:
+        else:
             # バッファを全て出力する
             sys.stdout.flush()
-            sys.stdout = TextIOWrapper(
-                sys.stdout.buffer, encoding="utf-8", errors="backslashreplace"
-            )
+            try:
+                sys.stdout = TextIOWrapper(
+                    sys.stdout.buffer, encoding="utf-8", errors="backslashreplace"
+                )
+            except AttributeError:
+                # stdout.bufferがない場合は無視
+                pass
     if sys.stderr is not None:
-        try:
+        if isinstance(sys.stderr, TextIOWrapper):
             sys.stderr.reconfigure(encoding="utf-8")
-        except AttributeError:
+        else:
             sys.stderr.flush()
-            sys.stderr = TextIOWrapper(
-                sys.stderr.buffer, encoding="utf-8", errors="backslashreplace"
-            )
+            try:
+                sys.stderr = TextIOWrapper(
+                    sys.stderr.buffer, encoding="utf-8", errors="backslashreplace"
+                )
+            except AttributeError:
+                # stderr.bufferがない場合は無視
+                pass
 
 
 def generate_app(
@@ -183,7 +191,9 @@ def generate_app(
 
     # 許可されていないOriginを遮断するミドルウェア
     @app.middleware("http")
-    async def block_origin_middleware(request: Request, call_next):
+    async def block_origin_middleware(
+        request: Request, call_next: Callable[[Request], Awaitable[Response]]
+    ) -> Response | JSONResponse:
         isValidOrigin: bool = False
         if "Origin" not in request.headers:  # Originのない純粋なリクエストの場合
             isValidOrigin = True
@@ -1081,7 +1091,7 @@ def add_user_dict_word(
         pronunciation: str,
         accent_type: int,
         word_type: WordTypes | None = None,
-        priority: conint(ge=MIN_PRIORITY, le=MAX_PRIORITY) | None = None,
+        priority: Annotated[int | None, Query(ge=MIN_PRIORITY, le=MAX_PRIORITY)] = None,
     ) -> Response:
         """
         ユーザー辞書に言葉を追加します。
@@ -1128,7 +1138,7 @@ def rewrite_user_dict_word(
         accent_type: int,
         word_uuid: str,
         word_type: WordTypes | None = None,
-        priority: conint(ge=MIN_PRIORITY, le=MAX_PRIORITY) | None = None,
+        priority: Annotated[int | None, Query(ge=MIN_PRIORITY, le=MAX_PRIORITY)] = None,
     ) -> Response:
         """
         ユーザー辞書に登録されている言葉を更新します。
@@ -1348,7 +1358,7 @@ def custom_openapi():
         app.openapi_schema = openapi_schema
         return openapi_schema
 
-    app.openapi = custom_openapi
+    app.openapi = custom_openapi  # type: ignore[method-assign]
 
     return app
 
diff --git a/setup.cfg b/setup.cfg
index 2a1a913e0..a66e1fd12 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,7 +24,8 @@ disallow_untyped_defs = False
 ignore_errors = False
 ignore_missing_imports = True
 no_implicit_optional = True
-python_version = 3.10
+plugins = numpy.typing.mypy_plugin,pydantic.mypy
+python_version = 3.11
 show_error_codes = True
 strict_equality = True
 strict_optional = True
diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
index e3ce43e24..3af7c4a67 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core_adapter.py
@@ -35,7 +35,7 @@ def supported_devices(self) -> str | None:
             supported_devices = None
         return supported_devices
 
-    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool):
+    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool) -> None:
         """
         指定したスタイルでの音声合成を初期化する。
         何度も実行可能。未実装の場合は何もしない。
diff --git a/voicevox_engine/core_wrapper.py b/voicevox_engine/core_wrapper.py
index 07d37ea4a..6c74e6bb6 100644
--- a/voicevox_engine/core_wrapper.py
+++ b/voicevox_engine/core_wrapper.py
@@ -5,7 +5,7 @@
 from dataclasses import dataclass
 from enum import Enum, auto
 from pathlib import Path
-from typing import List, Literal
+from typing import Literal
 
 import numpy as np
 
@@ -18,7 +18,7 @@ class CoreError(Exception):
     """コア呼び出しで発生したエラー"""
 
 
-def load_runtime_lib(runtime_dirs: List[Path]):
+def load_runtime_lib(runtime_dirs: list[Path]) -> None:
     if platform.system() == "Windows":
         # DirectML.dllはonnxruntimeと互換性のないWindows標準搭載のものを優先して読み込むことがあるため、明示的に読み込む
         # 参考 1. https://github.com/microsoft/onnxruntime/issues/3360
@@ -246,13 +246,13 @@ def _get_arch_name() -> Literal["x64", "x86", "aarch64", "armv7l"] | None:
     elif machine == "arm64":
         return "aarch64"
     elif machine in ["armv7l", "aarch64"]:
-        return machine
+        return machine  # type: ignore[return-value]
     else:
         return None
 
 
 def _get_core_name(
-    arch_name: Literal["x64", "x86", "aarch64", "armv7l"],
+    arch_name: Literal["x64", "x86", "aarch64", "armv7l"] | str,
     platform_name: str,
     model_type: Literal["libtorch", "onnxruntime"],
     gpu_type: GPUType,
@@ -463,7 +463,6 @@ def __init__(
         cpu_num_threads: int = 0,
         load_all_models: bool = False,
     ) -> None:
-
         self.default_sampling_rate = 24000
 
         self.core = load_core(core_dir, use_gpu)
@@ -485,6 +484,7 @@ def __init__(
         is_version_0_12_core_or_later = (
             _find_version_0_12_core_or_later(core_dir) is not None
         )
+        model_type: Literal["libtorch", "onnxruntime"] | None
         if is_version_0_12_core_or_later:
             model_type = "onnxruntime"
             self.exist_load_model = True
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 52306d9c7..07c9202f0 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -100,7 +100,7 @@ def decode_forward(
     ) -> ndarray:
         """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDから音声波形を生成する"""
         # Mock: 定数の音声波形を生成。[0.1, 0.1, ..., 0.1, 0.1]
-        return 0.1 * numpy.one((length * 256,), dtype=numpy.float32)
+        return 0.1 * numpy.ones((length * 256,), dtype=numpy.float32)
 
     def supported_devices(self):
         return json.dumps(
diff --git a/voicevox_engine/library_manager.py b/voicevox_engine/library_manager.py
index 3bab1508b..c7f6c6502 100644
--- a/voicevox_engine/library_manager.py
+++ b/voicevox_engine/library_manager.py
@@ -104,9 +104,9 @@ def installed_libraries(self) -> Dict[str, InstalledLibraryInfo]:
                 # ライブラリ情報の取得 from `library_root_dir / f"{library_uuid}" / "metas.json"`
                 library_uuid = os.path.basename(library_dir)
                 with open(library_dir / INFO_FILE, encoding="utf-8") as f:
-                    library[library_uuid] = json.load(f)
-                    # アンインストール出来ないライブラリを作る場合、何かしらの条件でFalseを設定する
-                    library[library_uuid]["uninstallable"] = True
+                    info = json.load(f)
+                # アンインストール出来ないライブラリを作る場合、何かしらの条件でFalseを設定する
+                library[library_uuid] = InstalledLibraryInfo(**info, uninstallable=True)
         return library
 
     def install_library(self, library_id: str, file: BytesIO) -> Path:
@@ -211,7 +211,7 @@ def install_library(self, library_id: str, file: BytesIO) -> Path:
 
         return library_dir
 
-    def uninstall_library(self, library_id: str):
+    def uninstall_library(self, library_id: str) -> None:
         """
         インストール済み音声ライブラリのアンインストール
         Parameters
@@ -227,7 +227,7 @@ def uninstall_library(self, library_id: str):
             )
 
         # アンインストール許可フラグのバリデーション
-        if not installed_libraries[library_id]["uninstallable"]:
+        if not installed_libraries[library_id].uninstallable:
             raise HTTPException(
                 status_code=403, detail=f"指定された音声ライブラリ {library_id} はアンインストールできません。"
             )
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index efed15353..3bdc365c4 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -2,7 +2,12 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Dict, List, Tuple
 
-from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo
+from voicevox_engine.metas.Metas import (
+    CoreSpeaker,
+    EngineSpeaker,
+    Speaker,
+    SpeakerStyle,
+)
 
 if TYPE_CHECKING:
     from voicevox_engine.core_adapter import CoreAdapter
@@ -54,7 +59,9 @@ def load_combined_metas(self, core: "CoreAdapter") -> List[Speaker]:
         ]
 
 
-def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]:
+def construct_lookup(
+    speakers: List[Speaker],
+) -> Dict[int, Tuple[Speaker, SpeakerStyle]]:
     """
     スタイルID に話者メタ情報・スタイルメタ情報を紐付ける対応表を生成
     Parameters
@@ -63,10 +70,10 @@ def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleI
         話者メタ情報
     Returns
     -------
-    ret : Dict[int, Tuple[Speaker, StyleInfo]]
+    ret : Dict[int, Tuple[Speaker, SpeakerStyle]]
         スタイルID に話者メタ情報・スタイルメタ情報が紐付いた対応表
     """
-    lookup_table = dict()
+    lookup_table: dict[int, tuple[Speaker, SpeakerStyle]] = dict()
     for speaker in speakers:
         for style in speaker.styles:
             lookup_table[style.id] = (speaker, style)
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index d2d7e30f7..d5477a131 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -1,8 +1,8 @@
 from enum import Enum
 from re import findall, fullmatch
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
-from pydantic import BaseModel, Field, StrictStr, conint, validator
+from pydantic import BaseModel, Field, StrictStr, validator
 
 from .metas.Metas import Speaker, SpeakerInfo
 
@@ -80,7 +80,7 @@ class ParseKanaErrorCode(Enum):
 
 
 class ParseKanaError(Exception):
-    def __init__(self, errcode: ParseKanaErrorCode, **kwargs):
+    def __init__(self, errcode: ParseKanaErrorCode, **kwargs: Any) -> None:
         self.errcode = errcode
         self.errname = errcode.name
         self.kwargs: Dict[str, str] = kwargs
@@ -167,8 +167,8 @@ class UserDictWord(BaseModel):
     """
 
     surface: str = Field(title="表層形")
-    priority: conint(ge=USER_DICT_MIN_PRIORITY, le=USER_DICT_MAX_PRIORITY) = Field(
-        title="優先度"
+    priority: int = Field(
+        title="優先度", ge=USER_DICT_MIN_PRIORITY, le=USER_DICT_MAX_PRIORITY
     )
     context_id: int = Field(title="文脈ID", default=1348)
     part_of_speech: str = Field(title="品詞")
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 6f456df0d..31be8cfa9 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -8,7 +8,7 @@
 from soxr import resample
 
 from .core_adapter import CoreAdapter
-from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo
+from .metas.Metas import Speaker, SpeakerStyle, SpeakerSupportPermittedSynthesisMorphing
 from .metas.MetasStore import construct_lookup
 from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
 from .tts_pipeline import TTSEngine
@@ -76,7 +76,7 @@ def get_morphable_targets(
 
 
 def is_synthesis_morphing_permitted(
-    speaker_lookup: Dict[int, Tuple[Speaker, StyleInfo]],
+    speaker_lookup: Dict[int, Tuple[Speaker, SpeakerStyle]],
     base_speaker: int,
     target_speaker: int,
 ) -> bool:
diff --git a/voicevox_engine/preset/PresetManager.py b/voicevox_engine/preset/PresetManager.py
index d06ec2b03..cfb8dc161 100644
--- a/voicevox_engine/preset/PresetManager.py
+++ b/voicevox_engine/preset/PresetManager.py
@@ -24,7 +24,7 @@ def __init__(self, preset_path: Path):
             プリセット情報を一元管理するYAMLファイルへのパス
         """
         self.presets: list[Preset] = []
-        self.last_modified_time = 0
+        self.last_modified_time = 0.0
         self.preset_path = preset_path
 
     def load_presets(self) -> list[Preset]:
@@ -66,7 +66,7 @@ def load_presets(self) -> list[Preset]:
 
         return self.presets
 
-    def add_preset(self, preset: Preset):
+    def add_preset(self, preset: Preset) -> int:
         """
         新規プリセットの追加
         Parameters
@@ -100,7 +100,7 @@ def add_preset(self, preset: Preset):
 
         return preset.id
 
-    def update_preset(self, preset: Preset):
+    def update_preset(self, preset: Preset) -> int:
         """
         既存プリセットの更新
         Parameters
@@ -117,7 +117,7 @@ def update_preset(self, preset: Preset):
         self.load_presets()
 
         # 対象プリセットの検索
-        prev_preset = (-1, None)
+        prev_preset: tuple[int, Preset | None] = (-1, None)
         for i in range(len(self.presets)):
             if self.presets[i].id == preset.id:
                 prev_preset = (i, self.presets[i])
@@ -130,8 +130,7 @@ def update_preset(self, preset: Preset):
         try:
             self._write_on_file()
         except Exception as err:
-            if prev_preset != (-1, None):
-                self.presets[prev_preset[0]] = prev_preset[1]
+            self.presets[prev_preset[0]] = prev_preset[1]
             if isinstance(err, FileNotFoundError):
                 raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
             else:
@@ -139,7 +138,7 @@ def update_preset(self, preset: Preset):
 
         return preset.id
 
-    def delete_preset(self, id: int):
+    def delete_preset(self, id: int) -> int:
         """
         指定したIDのプリセットの削除
         Parameters
diff --git a/voicevox_engine/setting/SettingLoader.py b/voicevox_engine/setting/SettingLoader.py
index 453b7a94d..9d0c30c94 100644
--- a/voicevox_engine/setting/SettingLoader.py
+++ b/voicevox_engine/setting/SettingLoader.py
@@ -28,13 +28,12 @@ def load_setting_file(self) -> Setting:
             # 指定された設定ファイルから値を取得
             setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8"))
 
-        setting = Setting(
-            cors_policy_mode=setting["cors_policy_mode"],
+        # FIXME: 型チェックと例外処理を追加する
+        return Setting(
+            cors_policy_mode=setting["cors_policy_mode"],  # type: ignore[arg-type]
             allow_origin=setting["allow_origin"],
         )
 
-        return setting
-
     def dump_setting_file(self, settings: Setting) -> None:
         settings_dict = settings.dict()
 
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index 2b6cfeed3..aef7f5fad 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -25,7 +25,7 @@ def __init__(self, phoneme: str):
 
         self.phoneme = phoneme
 
-    def __eq__(self, o: object):
+    def __eq__(self, o: object):  # type:ignore[no-untyped-def]
         """Deprecated."""
         raise NotImplementedError
 
diff --git a/voicevox_engine/tts_pipeline/make_tts_engines.py b/voicevox_engine/tts_pipeline/make_tts_engines.py
index 006ef318a..bb1857468 100644
--- a/voicevox_engine/tts_pipeline/make_tts_engines.py
+++ b/voicevox_engine/tts_pipeline/make_tts_engines.py
@@ -80,7 +80,7 @@ def make_cores(
 
     if not enable_mock:
 
-        def load_core_library(core_dir: Path, suppress_error: bool = False):
+        def load_core_library(core_dir: Path, suppress_error: bool = False) -> None:
             """
             指定されたコアをロードし `cores` へ登録する。
             Parameters
diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py
index c183d31fc..086a0e653 100644
--- a/voicevox_engine/tts_pipeline/text_analyzer.py
+++ b/voicevox_engine/tts_pipeline/text_analyzer.py
@@ -16,7 +16,7 @@ class Label:
     contexts: dict[str, str]  # ラベルの属性
 
     @classmethod
-    def from_feature(cls, feature: str):
+    def from_feature(cls, feature: str) -> Self:
         """OpenJTalk feature から Label インスタンスを生成する"""
         # フルコンテキストラベルの仕様は、http://hts.sp.nitech.ac.jp/?Download の HTS-2.3のJapanese tar.bz2 (126 MB)をダウンロードして、data/lab_format.pdfを見るとリストが見つかります。 # noqa
         contexts = re.search(
@@ -33,7 +33,7 @@ def from_feature(cls, feature: str):
             r"/J\:(?P<j1>.+?)\_(?P<j2>.+?)"
             r"/K\:(?P<k1>.+?)\+(?P<k2>.+?)\-(?P<k3>.+?)$",
             feature,
-        ).groupdict()
+        ).groupdict()  # type: ignore[union-attr]
         return cls(contexts=contexts)
 
     @property
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 612d8b43d..9cc3afa76 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -1,6 +1,5 @@
 import copy
 import math
-from typing import List, Optional
 
 import numpy
 from numpy import ndarray
@@ -53,7 +52,9 @@ def to_flatten_phonemes(moras: list[Mora]) -> list[Phoneme]:
     return phonemes
 
 
-def split_mora(phoneme_list: List[Phoneme]):
+def split_mora(
+    phoneme_list: list[Phoneme],
+) -> tuple[list[Phoneme | None], list[Phoneme], list[int]]:
     """音素系列から子音系列・母音系列・母音位置を抽出する"""
     vowel_indexes = [
         i for i, p in enumerate(phoneme_list) if p.phoneme in mora_phoneme_list
@@ -64,7 +65,7 @@ def split_mora(phoneme_list: List[Phoneme]):
     # 1の場合はconsonant(子音)が存在しない=母音のみ(a/i/u/e/o/N/cl/pau)で構成されるモーラ(音)である
     # 2の場合はconsonantが存在するモーラである
     # なので、2の場合(else)でphonemeを取り出している
-    consonant_phoneme_list: List[Optional[Phoneme]] = [None] + [
+    consonant_phoneme_list = [None] + [
         None if post - prev == 1 else phoneme_list[post - 1]
         for prev, post in zip(vowel_indexes[:-1], vowel_indexes[1:])
     ]
@@ -147,12 +148,18 @@ def count_frame_per_unit(moras: list[Mora]) -> tuple[ndarray, ndarray]:
     frame_per_mora : ndarray
         モーラあたりのフレーム長。端数丸め。shape = (Mora,)
     """
-    frame_per_phoneme: list[ndarray] = []
-    frame_per_mora: list[ndarray] = []
+    frame_per_phoneme: list | ndarray = []
+    frame_per_mora: list | ndarray = []
     for mora in moras:
         vowel_frames = _to_frame(mora.vowel_length)
-        consonant_frames = _to_frame(mora.consonant_length) if mora.consonant else 0
-        mora_frames = vowel_frames + consonant_frames  # 音素ごとにフレーム長を算出し、和をモーラのフレーム長とする
+        consonant_frames = (
+            _to_frame(mora.consonant_length)  # type:ignore[arg-type]
+            if mora.consonant
+            else 0
+        )
+        mora_frames = (
+            vowel_frames + consonant_frames  # type:ignore[call-overload]
+        )  # 音素ごとにフレーム長を算出し、和をモーラのフレーム長とする
 
         if mora.consonant:
             frame_per_phoneme += [consonant_frames]
@@ -165,7 +172,7 @@ def count_frame_per_unit(moras: list[Mora]) -> tuple[ndarray, ndarray]:
     return frame_per_phoneme, frame_per_mora
 
 
-def _to_frame(sec: float) -> ndarray:
+def _to_frame(sec: float) -> numpy.int32:
     FRAMERATE = 93.75  # 24000 / 256 [frame/sec]
     # NOTE: `round` は偶数丸め。移植時に取扱い注意。詳細は voicevox_engine#552
     return numpy.round(sec * FRAMERATE).astype(numpy.int32)
@@ -282,8 +289,8 @@ def replace_phoneme_length(
         return accent_phrases
 
     def replace_mora_pitch(
-        self, accent_phrases: List[AccentPhrase], style_id: int
-    ) -> List[AccentPhrase]:
+        self, accent_phrases: list[AccentPhrase], style_id: int
+    ) -> list[AccentPhrase]:
         """
         accent_phrasesの音高(ピッチ)を設定する
         Parameters
@@ -306,7 +313,7 @@ def replace_mora_pitch(
         flatten_moras, phoneme_data_list = pre_process(accent_phrases)
 
         # accent
-        def _create_one_hot(accent_phrase: AccentPhrase, position: int):
+        def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
             """
             単位行列(numpy.eye)を応用し、accent_phrase内でone hotな配列(リスト)を作る
             例えば、accent_phraseのmorasの長さが12、positionが1なら
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
index a64cb2363..7de519c4d 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict.py
@@ -9,7 +9,6 @@
 import numpy as np
 import pyopenjtalk
 from fastapi import HTTPException
-from pydantic import conint
 
 from .model import UserDictWord, WordTypes
 from .part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY, part_of_speech_data
@@ -436,16 +435,18 @@ def _search_cost_candidates(context_id: int) -> List[int]:
     raise HTTPException(status_code=422, detail="品詞IDが不正です")
 
 
-def _cost2priority(context_id: int, cost: conint(ge=-32768, le=32767)) -> int:
+def _cost2priority(context_id: int, cost: int) -> int:
+    assert -32768 <= cost <= 32767
     cost_candidates = _search_cost_candidates(context_id)
     # cost_candidatesの中にある値で最も近い値を元にpriorityを返す
     # 参考: https://qiita.com/Krypf/items/2eada91c37161d17621d
     # この関数とpriority2cost関数によって、辞書ファイルのcostを操作しても最も近いpriorityのcostに上書きされる
-    return MAX_PRIORITY - np.argmin(np.abs(np.array(cost_candidates) - cost))
+    return MAX_PRIORITY - np.argmin(
+        np.abs(np.array(cost_candidates) - cost)
+    )  # type:ignore [return-value]
 
 
-def _priority2cost(
-    context_id: int, priority: conint(ge=MIN_PRIORITY, le=MAX_PRIORITY)
-) -> int:
+def _priority2cost(context_id: int, priority: int) -> int:
+    assert MIN_PRIORITY <= priority <= MAX_PRIORITY
     cost_candidates = _search_cost_candidates(context_id)
     return cost_candidates[MAX_PRIORITY - priority]
diff --git a/voicevox_engine/utility/core_version_utility.py b/voicevox_engine/utility/core_version_utility.py
index 9811c25f0..a98febd54 100644
--- a/voicevox_engine/utility/core_version_utility.py
+++ b/voicevox_engine/utility/core_version_utility.py
@@ -1,4 +1,4 @@
-from typing import Sequence
+from collections.abc import Collection
 
 from semver.version import Version
 
@@ -7,7 +7,7 @@ def parse_core_version(version: str) -> Version:
     return Version.parse(version)
 
 
-def get_latest_core_version(versions: Sequence[str]) -> str:
+def get_latest_core_version(versions: Collection[str]) -> str:
     if len(versions) == 0:
         raise Exception("versions must be non-empty.")
 
diff --git a/voicevox_engine/utility/mutex_utility.py b/voicevox_engine/utility/mutex_utility.py
index 09d8cb968..def3a846f 100644
--- a/voicevox_engine/utility/mutex_utility.py
+++ b/voicevox_engine/utility/mutex_utility.py
@@ -1,7 +1,11 @@
 import threading
+from collections.abc import Callable
+from typing import Any, TypeVar
 
+F = TypeVar("F", bound=Callable[..., Any])
 
-def mutex_wrapper(lock: threading.Lock):
+
+def mutex_wrapper(lock: threading.Lock) -> Callable[[F], F]:
     def wrap(f):
         def func(*args, **kw):
             lock.acquire()

From 2053549cbaf56b663bed28fea2d69174849a8dcd Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Fri, 29 Dec 2023 22:19:34 +0900
Subject: [PATCH 086/177] =?UTF-8?q?mypy=E7=94=A8=E3=81=AEignore=E3=82=92?=
 =?UTF-8?q?=E3=81=84=E3=81=A3=E3=81=B1=E3=81=84=E5=A4=B1=E3=81=8F=E3=81=99?=
 =?UTF-8?q?=20(#946)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix
---
 voicevox_engine/core_wrapper.py               | 13 +++++++-----
 voicevox_engine/model.py                      |  2 +-
 voicevox_engine/setting/SettingLoader.py      |  4 ++--
 .../acoustic_feature_extractor.py             |  2 +-
 voicevox_engine/tts_pipeline/text_analyzer.py |  8 +++++--
 voicevox_engine/tts_pipeline/tts_engine.py    | 21 +++++++------------
 voicevox_engine/user_dict.py                  |  4 +---
 7 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/voicevox_engine/core_wrapper.py b/voicevox_engine/core_wrapper.py
index 6c74e6bb6..6ba4eeadc 100644
--- a/voicevox_engine/core_wrapper.py
+++ b/voicevox_engine/core_wrapper.py
@@ -245,23 +245,26 @@ def _get_arch_name() -> Literal["x64", "x86", "aarch64", "armv7l"] | None:
         return "x86"
     elif machine == "arm64":
         return "aarch64"
-    elif machine in ["armv7l", "aarch64"]:
-        return machine  # type: ignore[return-value]
+    elif machine == "aarch64":
+        return "aarch64"
+    elif machine == "armv7l":
+        return "armv7l"
     else:
         return None
 
 
 def _get_core_name(
-    arch_name: Literal["x64", "x86", "aarch64", "armv7l"] | str,
+    arch_name: Literal["x64", "x86", "aarch64", "armv7l", "universal"],
     platform_name: str,
     model_type: Literal["libtorch", "onnxruntime"],
     gpu_type: GPUType,
 ) -> str | None:
     """
-    設定値を満たすCoreの名前（None: サポート外）
+    設定値を満たすCoreの名前（None: サポート外）。
+    macOSの場合はarch_nameをuniversalにする。
     Parameters
     ----------
-    arch_name : Literal["x64", "x86", "aarch64", "armv7l"]
+    arch_name : Literal["x64", "x86", "aarch64", "armv7l", "universal"]
         実行中マシンのアーキテクチャ
     platform_name : str
         実行中マシンのシステム名
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index d5477a131..d8951b0be 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -83,7 +83,7 @@ class ParseKanaError(Exception):
     def __init__(self, errcode: ParseKanaErrorCode, **kwargs: Any) -> None:
         self.errcode = errcode
         self.errname = errcode.name
-        self.kwargs: Dict[str, str] = kwargs
+        self.kwargs = kwargs
         err_fmt: str = errcode.value
         self.text = err_fmt.format(**kwargs)
 
diff --git a/voicevox_engine/setting/SettingLoader.py b/voicevox_engine/setting/SettingLoader.py
index 9d0c30c94..2a22e025b 100644
--- a/voicevox_engine/setting/SettingLoader.py
+++ b/voicevox_engine/setting/SettingLoader.py
@@ -26,11 +26,11 @@ def load_setting_file(self) -> Setting:
             setting = {"allow_origin": None, "cors_policy_mode": "localapps"}
         else:
             # 指定された設定ファイルから値を取得
+            # FIXME: 型チェックと例外処理を追加する
             setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8"))
 
-        # FIXME: 型チェックと例外処理を追加する
         return Setting(
-            cors_policy_mode=setting["cors_policy_mode"],  # type: ignore[arg-type]
+            cors_policy_mode=setting["cors_policy_mode"],
             allow_origin=setting["allow_origin"],
         )
 
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index aef7f5fad..1c6506a56 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -25,7 +25,7 @@ def __init__(self, phoneme: str):
 
         self.phoneme = phoneme
 
-    def __eq__(self, o: object):  # type:ignore[no-untyped-def]
+    def __eq__(self, o: object) -> bool:
         """Deprecated."""
         raise NotImplementedError
 
diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py
index 086a0e653..cd91783e7 100644
--- a/voicevox_engine/tts_pipeline/text_analyzer.py
+++ b/voicevox_engine/tts_pipeline/text_analyzer.py
@@ -19,7 +19,7 @@ class Label:
     def from_feature(cls, feature: str) -> Self:
         """OpenJTalk feature から Label インスタンスを生成する"""
         # フルコンテキストラベルの仕様は、http://hts.sp.nitech.ac.jp/?Download の HTS-2.3のJapanese tar.bz2 (126 MB)をダウンロードして、data/lab_format.pdfを見るとリストが見つかります。 # noqa
-        contexts = re.search(
+        result = re.search(
             r"^(?P<p1>.+?)\^(?P<p2>.+?)\-(?P<p3>.+?)\+(?P<p4>.+?)\=(?P<p5>.+?)"
             r"/A\:(?P<a1>.+?)\+(?P<a2>.+?)\+(?P<a3>.+?)"
             r"/B\:(?P<b1>.+?)\-(?P<b2>.+?)\_(?P<b3>.+?)"
@@ -33,7 +33,11 @@ def from_feature(cls, feature: str) -> Self:
             r"/J\:(?P<j1>.+?)\_(?P<j2>.+?)"
             r"/K\:(?P<k1>.+?)\+(?P<k2>.+?)\-(?P<k3>.+?)$",
             feature,
-        ).groupdict()  # type: ignore[union-attr]
+        )
+        if result is None:
+            raise ValueError(feature)
+
+        contexts = result.groupdict()
         return cls(contexts=contexts)
 
     @property
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 9cc3afa76..7eca7619f 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -148,34 +148,27 @@ def count_frame_per_unit(moras: list[Mora]) -> tuple[ndarray, ndarray]:
     frame_per_mora : ndarray
         モーラあたりのフレーム長。端数丸め。shape = (Mora,)
     """
-    frame_per_phoneme: list | ndarray = []
-    frame_per_mora: list | ndarray = []
+    frame_per_phoneme: list[int] = []
+    frame_per_mora: list[int] = []
     for mora in moras:
         vowel_frames = _to_frame(mora.vowel_length)
         consonant_frames = (
-            _to_frame(mora.consonant_length)  # type:ignore[arg-type]
-            if mora.consonant
-            else 0
+            _to_frame(mora.consonant_length) if mora.consonant_length is not None else 0
         )
-        mora_frames = (
-            vowel_frames + consonant_frames  # type:ignore[call-overload]
-        )  # 音素ごとにフレーム長を算出し、和をモーラのフレーム長とする
+        mora_frames = vowel_frames + consonant_frames  # 音素ごとにフレーム長を算出し、和をモーラのフレーム長とする
 
         if mora.consonant:
             frame_per_phoneme += [consonant_frames]
         frame_per_phoneme += [vowel_frames]
         frame_per_mora += [mora_frames]
 
-    frame_per_phoneme = numpy.array(frame_per_phoneme)
-    frame_per_mora = numpy.array(frame_per_mora)
+    return numpy.array(frame_per_phoneme), numpy.array(frame_per_mora)
 
-    return frame_per_phoneme, frame_per_mora
 
-
-def _to_frame(sec: float) -> numpy.int32:
+def _to_frame(sec: float) -> int:
     FRAMERATE = 93.75  # 24000 / 256 [frame/sec]
     # NOTE: `round` は偶数丸め。移植時に取扱い注意。詳細は voicevox_engine#552
-    return numpy.round(sec * FRAMERATE).astype(numpy.int32)
+    return numpy.round(sec * FRAMERATE).astype(numpy.int32).item()
 
 
 def apply_pitch_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
index 7de519c4d..e931f557c 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict.py
@@ -441,9 +441,7 @@ def _cost2priority(context_id: int, cost: int) -> int:
     # cost_candidatesの中にある値で最も近い値を元にpriorityを返す
     # 参考: https://qiita.com/Krypf/items/2eada91c37161d17621d
     # この関数とpriority2cost関数によって、辞書ファイルのcostを操作しても最も近いpriorityのcostに上書きされる
-    return MAX_PRIORITY - np.argmin(
-        np.abs(np.array(cost_candidates) - cost)
-    )  # type:ignore [return-value]
+    return MAX_PRIORITY - np.argmin(np.abs(np.array(cost_candidates) - cost)).item()
 
 
 def _priority2cost(context_id: int, priority: int) -> int:

From f203bab8974328377622fc5570c5e2ccfdc4e712 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 30 Dec 2023 23:23:01 +0900
Subject: [PATCH 087/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20`TTSEngineBase`?=
 =?UTF-8?q?=20(#950)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove: `TTSEngineBase` の廃止
---
 run.py                                        |   6 +-
 voicevox_engine/metas/MetasStore.py           |   2 +-
 voicevox_engine/tts_pipeline/__init__.py      |   2 -
 voicevox_engine/tts_pipeline/tts_engine.py    |  33 +++++-
 .../tts_pipeline/tts_engine_base.py           | 104 ------------------
 5 files changed, 31 insertions(+), 116 deletions(-)
 delete mode 100644 voicevox_engine/tts_pipeline/tts_engine_base.py

diff --git a/run.py b/run.py
index 1d94100a4..6f3de1168 100644
--- a/run.py
+++ b/run.py
@@ -68,7 +68,7 @@
     SettingLoader,
 )
 from voicevox_engine.tts_pipeline import (
-    TTSEngineBase,
+    TTSEngine,
     make_cores,
     make_tts_engines_from_cores,
 )
@@ -145,7 +145,7 @@ def set_output_log_utf8() -> None:
 
 
 def generate_app(
-    synthesis_engines: Dict[str, TTSEngineBase],
+    synthesis_engines: Dict[str, TTSEngine],
     cores: Dict[str, CoreAdapter],
     latest_core_version: str,
     setting_loader: SettingLoader,
@@ -248,7 +248,7 @@ def check_disabled_mutable_api():
     def apply_user_dict():
         update_dict()
 
-    def get_engine(core_version: Optional[str]) -> TTSEngineBase:
+    def get_engine(core_version: Optional[str]) -> TTSEngine:
         if core_version is None:
             return synthesis_engines[latest_core_version]
         if core_version in synthesis_engines:
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 3bdc365c4..bd24fc209 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -34,7 +34,7 @@ def __init__(self, engine_speakers_path: Path) -> None:
         }
 
     # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで
-    # TTSEngineBaseによる循環importを修正する
+    # TTSEngineによる循環importを修正する
     def load_combined_metas(self, core: "CoreAdapter") -> List[Speaker]:
         """
         コアに含まれる話者メタ情報とエンジンに含まれる話者メタ情報を統合
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
index 6a2aea152..110256fc4 100644
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -1,7 +1,6 @@
 from ..core_wrapper import CoreWrapper, load_runtime_lib
 from .make_tts_engines import make_cores
 from .tts_engine import TTSEngine, make_tts_engines_from_cores
-from .tts_engine_base import TTSEngineBase
 
 __all__ = [
     "CoreWrapper",
@@ -9,5 +8,4 @@
     "make_cores",
     "make_tts_engines_from_cores",
     "TTSEngine",
-    "TTSEngineBase",
 ]
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 7eca7619f..a49ef505b 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -10,7 +10,7 @@
 from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import Phoneme
 from .mora_list import openjtalk_mora2text
-from .tts_engine_base import TTSEngineBase
+from .text_analyzer import text_to_accent_phrases
 
 unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
 mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
@@ -243,7 +243,7 @@ def raw_wave_to_output_wave(query: AudioQuery, wave: ndarray, sr_wave: int) -> n
     return wave
 
 
-class TTSEngine(TTSEngineBase):
+class TTSEngine:
     """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
 
     def __init__(self, core: CoreWrapper):
@@ -419,6 +419,29 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
 
         return accent_phrases
 
+    def replace_mora_data(
+        self, accent_phrases: list[AccentPhrase], style_id: int
+    ) -> list[AccentPhrase]:
+        """アクセント句系列の音素長・モーラ音高をスタイルIDに基づいて更新する"""
+        return self.replace_mora_pitch(
+            accent_phrases=self.replace_phoneme_length(
+                accent_phrases=accent_phrases, style_id=style_id
+            ),
+            style_id=style_id,
+        )
+
+    def create_accent_phrases(self, text: str, style_id: int) -> list[AccentPhrase]:
+        """テキストからアクセント句系列を生成し、スタイルIDに基づいてその音素長・モーラ音高を更新する"""
+        # 音素とアクセントの推定
+        accent_phrases = text_to_accent_phrases(text)
+
+        # 音素長・モーラ音高の推定と更新
+        accent_phrases = self.replace_mora_data(
+            accent_phrases=accent_phrases,
+            style_id=style_id,
+        )
+        return accent_phrases
+
     def synthesis(
         self,
         query: AudioQuery,
@@ -438,13 +461,11 @@ def synthesis(
         return wave
 
 
-def make_tts_engines_from_cores(
-    cores: dict[str, CoreAdapter]
-) -> dict[str, TTSEngineBase]:
+def make_tts_engines_from_cores(cores: dict[str, CoreAdapter]) -> dict[str, TTSEngine]:
     """コア一覧からTTSエンジン一覧を生成する"""
     # FIXME: `MOCK_VER` を循環 import 無しに `make_cores()` 関連モジュールから import する
     MOCK_VER = "0.0.0"
-    tts_engines: dict[str, TTSEngineBase] = {}
+    tts_engines: dict[str, TTSEngine] = {}
     for ver, core in cores.items():
         if ver == MOCK_VER:
             from ..dev.synthesis_engine import MockTTSEngine
diff --git a/voicevox_engine/tts_pipeline/tts_engine_base.py b/voicevox_engine/tts_pipeline/tts_engine_base.py
deleted file mode 100644
index 77c486056..000000000
--- a/voicevox_engine/tts_pipeline/tts_engine_base.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from abc import ABCMeta, abstractmethod
-from typing import List
-
-import numpy as np
-
-from ..model import AccentPhrase, AudioQuery
-from .text_analyzer import text_to_accent_phrases
-
-
-class TTSEngineBase(metaclass=ABCMeta):
-    @abstractmethod
-    def replace_phoneme_length(
-        self, accent_phrases: List[AccentPhrase], style_id: int
-    ) -> List[AccentPhrase]:
-        """
-        音素長の更新
-        Parameters
-        ----------
-        accent_phrases : List[AccentPhrase]
-            アクセント句系列
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        accent_phrases : List[AccentPhrase]
-            音素長が更新されたアクセント句系列
-        """
-        raise NotImplementedError()
-
-    @abstractmethod
-    def replace_mora_pitch(
-        self, accent_phrases: List[AccentPhrase], style_id: int
-    ) -> List[AccentPhrase]:
-        """
-        モーラ音高の更新
-        Parameters
-        ----------
-        accent_phrases : List[AccentPhrase]
-            アクセント句系列
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        accent_phrases : List[AccentPhrase]
-            モーラ音高が更新されたアクセント句系列
-        """
-        raise NotImplementedError()
-
-    def replace_mora_data(
-        self, accent_phrases: List[AccentPhrase], style_id: int
-    ) -> List[AccentPhrase]:
-        """
-        音素長・モーラ音高の更新
-        Parameters
-        ----------
-        accent_phrases : List[AccentPhrase]
-            アクセント句系列
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        accent_phrases : List[AccentPhrase]
-            アクセント句系列
-        """
-        return self.replace_mora_pitch(
-            accent_phrases=self.replace_phoneme_length(
-                accent_phrases=accent_phrases, style_id=style_id
-            ),
-            style_id=style_id,
-        )
-
-    def create_accent_phrases(self, text: str, style_id: int) -> List[AccentPhrase]:
-        """
-        テキストからアクセント句系列を生成。
-        音素長やモーラ音高も更新。
-        Parameters
-        ----------
-        text : str
-            日本語テキスト
-        style_id : int
-            スタイルID
-        Returns
-        -------
-        accent_phrases : List[AccentPhrase]
-            アクセント句系列
-        """
-        # 音素とアクセントの推定
-        accent_phrases = text_to_accent_phrases(text)
-
-        # 音素長・モーラ音高の推定と更新
-        accent_phrases = self.replace_mora_data(
-            accent_phrases=accent_phrases,
-            style_id=style_id,
-        )
-        return accent_phrases
-
-    def synthesis(
-        self,
-        query: AudioQuery,
-        style_id: int,
-        enable_interrogative_upspeak: bool = True,
-    ) -> np.ndarray:
-        """音声合成用のクエリ・スタイルID・疑問文語尾自動調整フラグに基づいて音声波形を生成する"""
-        raise NotImplementedError()

From 1e124ca5f43793104e2a482f7dd6f40835fd8291 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 00:43:42 +0900
Subject: [PATCH 088/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`MockCoreWrapper`?=
 =?UTF-8?q?=20=E5=85=A5=E5=8A=9B=E5=8F=8D=E6=98=A0=E5=87=BA=E5=8A=9B=20(#9?=
 =?UTF-8?q?51)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: `MockCoreWrapper` 入力反映出力

* refactor: dead code 削除
---
 test/test_synthesis_engine_base.py | 104 ++---------------------------
 voicevox_engine/dev/core/mock.py   |  49 +++++++++++---
 2 files changed, 43 insertions(+), 110 deletions(-)

diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py
index a3fddd31b..730879434 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_synthesis_engine_base.py
@@ -1,78 +1,13 @@
-from typing import List, Union
 from unittest import TestCase
-from unittest.mock import Mock
 
-import numpy
-
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
+from voicevox_engine.dev.core.mock import MockCoreWrapper
+from voicevox_engine.model import AccentPhrase, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.tts_engine import (
     apply_interrogative_upspeak,  # FIXME: この関数を使うテストをTTSEngine用のテストに移動する
 )
 
 
-def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray):
-    result = []
-    # mockとしての適当な処理、特に意味はない
-    for i in range(length):
-        result.append(round((phoneme_list[i] * 0.0625 + style_id).item(), 2))
-    return numpy.array(result)
-
-
-def yukarin_sa_mock(
-    length: int,
-    vowel_phoneme_list: numpy.ndarray,
-    consonant_phoneme_list: numpy.ndarray,
-    start_accent_list: numpy.ndarray,
-    end_accent_list: numpy.ndarray,
-    start_accent_phrase_list: numpy.ndarray,
-    end_accent_phrase_list: numpy.ndarray,
-    style_id: numpy.ndarray,
-):
-    result = []
-    # mockとしての適当な処理、特に意味はない
-    for i in range(length):
-        result.append(
-            round(
-                (
-                    (
-                        vowel_phoneme_list[0][i]
-                        + consonant_phoneme_list[0][i]
-                        + start_accent_list[0][i]
-                        + end_accent_list[0][i]
-                        + start_accent_phrase_list[0][i]
-                        + end_accent_phrase_list[0][i]
-                    )
-                    * 0.0625
-                    + style_id
-                ).item(),
-                2,
-            )
-        )
-    return numpy.array(result)[numpy.newaxis]
-
-
-def decode_mock(
-    length: int,
-    phoneme_size: int,
-    f0: numpy.ndarray,
-    phoneme: numpy.ndarray,
-    style_id: Union[numpy.ndarray, int],
-):
-    result = []
-    # mockとしての適当な処理、特に意味はない
-    for i in range(length):
-        # decode forwardはデータサイズがlengthの256倍になるのでとりあえず256回データをresultに入れる
-        for _ in range(256):
-            result.append(
-                (
-                    f0[i][0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size)
-                    + style_id
-                ).item()
-            )
-    return numpy.array(result)
-
-
 def koreha_arimasuka_base_expected():
     return [
         AccentPhrase(
@@ -156,46 +91,15 @@ def koreha_arimasuka_base_expected():
     ]
 
 
-def create_mock_query(accent_phrases):
-    return AudioQuery(
-        accent_phrases=accent_phrases,
-        speedScale=1,
-        pitchScale=0,
-        intonationScale=1,
-        volumeScale=1,
-        prePhonemeLength=0.1,
-        postPhonemeLength=0.1,
-        outputSamplingRate=24000,
-        outputStereo=False,
-        kana="",
-    )
-
-
-class MockCore:
-    default_sampling_rate = 24000
-    yukarin_s_forward = Mock(side_effect=yukarin_s_mock)
-    yukarin_sa_forward = Mock(side_effect=yukarin_sa_mock)
-    decode_forward = Mock(side_effect=decode_mock)
-
-    def metas(self):
-        return ""
-
-    def supported_devices(self):
-        return ""
-
-    def is_model_loaded(self, style_id):
-        return True
-
-
 class TestTTSEngineBase(TestCase):
     def setUp(self):
         super().setUp()
-        self.synthesis_engine = TTSEngine(core=MockCore())
+        self.synthesis_engine = TTSEngine(core=MockCoreWrapper())
 
     def create_synthesis_test_base(
         self,
         text: str,
-        expected: List[AccentPhrase],
+        expected: list[AccentPhrase],
         enable_interrogative_upspeak: bool,
     ):
         """音声合成時に疑問文モーラ処理を行っているかどうかを検証
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 07c9202f0..357276bb8 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -67,8 +67,11 @@ def yukarin_s_forward(
         self, length: int, phoneme_list: ndarray, style_id: ndarray
     ) -> ndarray:
         """音素系列サイズ・音素ID系列・スタイルIDから音素長系列を生成する"""
-        # Mock: 定数の音素長系列を生成。[0.1, 0.1, ...]
-        return 0.1 * numpy.ones((length,), dtype=numpy.float32)
+        result = []
+        # mockとしての適当な処理、特に意味はない
+        for i in range(length):
+            result.append(round((phoneme_list[i] * 0.0625 + style_id).item(), 2))
+        return numpy.array(result)
 
     def yukarin_sa_forward(
         self,
@@ -83,12 +86,28 @@ def yukarin_sa_forward(
     ) -> ndarray:
         """モーラ系列サイズ・母音系列・子音系列・アクセント位置・アクセント句区切り・スタイルIDからモーラ音高系列を生成する"""
         assert length > 1, "前後無音を必ず付与しなければならない"
-        # Mock: 定数のモーラ音高系列を生成。[0, 200, 100, 100, ..., 100, 0]
-        pitch = 100 * numpy.ones((1, length), dtype=numpy.float32)
-        pitch[0, 0] = 0.0  # 開始無音 (pau)
-        pitch[0, 1] = 200.0  # 分散 0 を避けるため
-        pitch[0, length - 1] = 0.0  # 終了無音 (pau)
-        return pitch
+
+        result = []
+        # mockとしての適当な処理、特に意味はない
+        for i in range(length):
+            result.append(
+                round(
+                    (
+                        (
+                            vowel_phoneme_list[0][i]
+                            + consonant_phoneme_list[0][i]
+                            + start_accent_list[0][i]
+                            + end_accent_list[0][i]
+                            + start_accent_phrase_list[0][i]
+                            + end_accent_phrase_list[0][i]
+                        )
+                        * 0.0625
+                        + style_id
+                    ).item(),
+                    2,
+                )
+            )
+        return numpy.array(result)[numpy.newaxis]
 
     def decode_forward(
         self,
@@ -99,8 +118,18 @@ def decode_forward(
         style_id: ndarray,
     ) -> ndarray:
         """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDから音声波形を生成する"""
-        # Mock: 定数の音声波形を生成。[0.1, 0.1, ..., 0.1, 0.1]
-        return 0.1 * numpy.ones((length * 256,), dtype=numpy.float32)
+        result = []
+        # mockとしての適当な処理、特に意味はない
+        for i in range(length):
+            # decode forwardはデータサイズがlengthの256倍になるのでとりあえず256回データをresultに入れる
+            for _ in range(256):
+                result.append(
+                    (
+                        f0[i][0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size)
+                        + style_id
+                    ).item()
+                )
+        return numpy.array(result)
 
     def supported_devices(self):
         return json.dumps(

From 141593b3b4c265302eb2e61d26dd5cf3e89b88bc Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 10:20:15 +0900
Subject: [PATCH 089/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`mora=5Flist`=20?=
 =?UTF-8?q?=E7=84=A1=E5=AD=90=E9=9F=B3=E8=A1=A8=E7=8F=BE=20(#953)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `mora_list` 無子音表現
---
 .../tts_pipeline/kana_converter.py            |  8 ++---
 voicevox_engine/tts_pipeline/mora_list.py     | 36 +++++++++----------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/kana_converter.py b/voicevox_engine/tts_pipeline/kana_converter.py
index c7367358f..abcac68e6 100644
--- a/voicevox_engine/tts_pipeline/kana_converter.py
+++ b/voicevox_engine/tts_pipeline/kana_converter.py
@@ -33,8 +33,8 @@
 for text, (consonant, vowel) in openjtalk_text2mora.items():
     _text2mora_with_unvoice[text] = Mora(
         text=text,
-        consonant=consonant if len(consonant) > 0 else None,
-        consonant_length=0 if len(consonant) > 0 else None,
+        consonant=consonant,
+        consonant_length=0 if consonant else None,
         vowel=vowel,
         vowel_length=0,
         pitch=0,
@@ -44,8 +44,8 @@
         # 例: "_ホ" -> "hO"
         _text2mora_with_unvoice[_UNVOICE_SYMBOL + text] = Mora(
             text=text,
-            consonant=consonant if len(consonant) > 0 else None,
-            consonant_length=0 if len(consonant) > 0 else None,
+            consonant=consonant,
+            consonant_length=0 if consonant else None,
             vowel=vowel.upper(),
             vowel_length=0,
             pitch=0,
diff --git a/voicevox_engine/tts_pipeline/mora_list.py b/voicevox_engine/tts_pipeline/mora_list.py
index b884f6dee..1f96658a8 100644
--- a/voicevox_engine/tts_pipeline/mora_list.py
+++ b/voicevox_engine/tts_pipeline/mora_list.py
@@ -42,13 +42,13 @@
 POSSIBILITY OF SUCH DAMAGE.
 """
 
-_mora_list_minimum: list[tuple[str, str, str]] = [
+_mora_list_minimum: list[tuple[str, str | None, str]] = [
     ("ヴォ", "v", "o"),
     ("ヴェ", "v", "e"),
     ("ヴィ", "v", "i"),
     ("ヴァ", "v", "a"),
     ("ヴ", "v", "u"),
-    ("ン", "", "N"),
+    ("ン", None, "N"),
     ("ワ", "w", "a"),
     ("ロ", "r", "o"),
     ("レ", "r", "e"),
@@ -131,7 +131,7 @@
     ("ツィ", "ts", "i"),
     ("ツァ", "ts", "a"),
     ("ツ", "ts", "u"),
-    ("ッ", "", "cl"),
+    ("ッ", None, "cl"),
     ("チョ", "ch", "o"),
     ("チュ", "ch", "u"),
     ("チャ", "ch", "a"),
@@ -179,23 +179,23 @@
     ("キ", "k", "i"),
     ("ガ", "g", "a"),
     ("カ", "k", "a"),
-    ("オ", "", "o"),
-    ("エ", "", "e"),
+    ("オ", None, "o"),
+    ("エ", None, "e"),
     ("ウォ", "w", "o"),
     ("ウェ", "w", "e"),
     ("ウィ", "w", "i"),
-    ("ウ", "", "u"),
+    ("ウ", None, "u"),
     ("イェ", "y", "e"),
-    ("イ", "", "i"),
-    ("ア", "", "a"),
+    ("イ", None, "i"),
+    ("ア", None, "a"),
 ]
-_mora_list_additional: list[tuple[str, str, str]] = [
+_mora_list_additional: list[tuple[str, str | None, str]] = [
     ("ヴョ", "by", "o"),
     ("ヴュ", "by", "u"),
     ("ヴャ", "by", "a"),
-    ("ヲ", "", "o"),
-    ("ヱ", "", "e"),
-    ("ヰ", "", "i"),
+    ("ヲ", None, "o"),
+    ("ヱ", None, "e"),
+    ("ヰ", None, "i"),
     ("ヮ", "w", "a"),
     ("ョ", "y", "o"),
     ("ュ", "y", "u"),
@@ -203,15 +203,15 @@
     ("ヂ", "j", "i"),
     ("ヶ", "k", "e"),
     ("ャ", "y", "a"),
-    ("ォ", "", "o"),
-    ("ェ", "", "e"),
-    ("ゥ", "", "u"),
-    ("ィ", "", "i"),
-    ("ァ", "", "a"),
+    ("ォ", None, "o"),
+    ("ェ", None, "e"),
+    ("ゥ", None, "u"),
+    ("ィ", None, "i"),
+    ("ァ", None, "a"),
 ]
 
 openjtalk_mora2text = {
-    consonant + vowel: text for [text, consonant, vowel] in _mora_list_minimum
+    (consonant or "") + vowel: text for [text, consonant, vowel] in _mora_list_minimum
 }
 openjtalk_text2mora = {
     text: (consonant, vowel)

From 641240f774394520fa237d84f38294a96a4210fa Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 10:21:45 +0900
Subject: [PATCH 090/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=A2?=
 =?UTF-8?q?=E3=83=83=E3=82=AF=20`decode=5Fforward()`=20=E7=B0=A1=E7=95=A5?=
 =?UTF-8?q?=E5=8C=96=20(#955)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: モック `decode_forward` 簡略化
---
 voicevox_engine/dev/core/mock.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 357276bb8..31918e0f3 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -117,18 +117,13 @@ def decode_forward(
         phoneme: ndarray,
         style_id: ndarray,
     ) -> ndarray:
-        """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDから音声波形を生成する"""
-        result = []
-        # mockとしての適当な処理、特に意味はない
+        """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDからダミー音声波形を生成する"""
+        # 入力値を反映し、長さが 256 倍であるダミー配列を出力する
+        result: list[ndarray] = []
         for i in range(length):
-            # decode forwardはデータサイズがlengthの256倍になるのでとりあえず256回データをresultに入れる
-            for _ in range(256):
-                result.append(
-                    (
-                        f0[i][0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size)
-                        + style_id
-                    ).item()
-                )
+            result += [
+                (f0[i, 0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size) + style_id)
+            ] * 256
         return numpy.array(result)
 
     def supported_devices(self):

From da256e9c5fde16afca03f8dceb8e8ff29f2e9780 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 10:22:58 +0900
Subject: [PATCH 091/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E9=9F=B3?=
 =?UTF-8?q?=E7=B4=A0=E9=95=B7=E3=83=BB=E3=83=94=E3=83=83=E3=83=81=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=E3=81=AE=E6=A7=8B=E9=80=A0=E5=8C=96=20(#952)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: 間接出力テスト構造化

* refactor: `place_phoneme_length` テスト構造化

* fix: lint

* refactor: `test_replace_mora_pitch` 構造化

* fix: 名前付き引数
---
 test/test_synthesis_engine.py | 110 ++++++++++++++++------------------
 1 file changed, 51 insertions(+), 59 deletions(-)

diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index b67d87cad..92eacb14e 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -582,26 +582,20 @@ def test_pre_process(self):
         )
 
     def test_replace_phoneme_length(self):
-        result = self.synthesis_engine.replace_phoneme_length(
-            accent_phrases=deepcopy(self.accent_phrases_hello_hiho), style_id=1
-        )
-
-        # yukarin_sに渡される値の検証
+        # Inputs
+        hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
+        # Outputs & Indirect Outputs（yukarin_sに渡される値）
+        result = self.synthesis_engine.replace_phoneme_length(hello_hiho, style_id=1)
         yukarin_s_args = self.yukarin_s_mock.call_args[1]
         list_length = yukarin_s_args["length"]
         phoneme_list = yukarin_s_args["phoneme_list"]
-        self.assertEqual(list_length, 20)
-        self.assertEqual(list_length, len(phoneme_list))
+        style_id = yukarin_s_args["style_id"]
+        # Expects
+        true_list_length = 20
+        true_style_id = 1
         true_phoneme_list_1 = [0, 23, 30, 4, 28, 21, 10, 21, 42, 7]
         true_phoneme_list_2 = [0, 19, 21, 19, 30, 12, 14, 35, 6, 0]
         true_phoneme_list = true_phoneme_list_1 + true_phoneme_list_2
-        numpy.testing.assert_array_equal(
-            phoneme_list,
-            numpy.array(true_phoneme_list, dtype=numpy.int64),
-        )
-        self.assertEqual(yukarin_s_args["style_id"], 1)
-
-        # flatten_morasを使わずに愚直にaccent_phrasesにデータを反映させてみる
         true_result = deepcopy(self.accent_phrases_hello_hiho)
         index = 1
 
@@ -619,24 +613,31 @@ def result_value(i: int):
             if accent_phrase.pause_mora is not None:
                 accent_phrase.pause_mora.vowel_length = result_value(index)
                 index += 1
-
+        # Tests
+        self.assertEqual(list_length, true_list_length)
+        self.assertEqual(list_length, len(phoneme_list))
+        self.assertEqual(style_id, true_style_id)
+        numpy.testing.assert_array_equal(
+            phoneme_list,
+            numpy.array(true_phoneme_list, dtype=numpy.int64),
+        )
         self.assertEqual(result, true_result)
 
     def test_replace_mora_pitch(self):
         # 空のリストでエラーを吐かないか
-        empty_accent_phrases = []
-        self.assertEqual(
-            self.synthesis_engine.replace_mora_pitch(
-                accent_phrases=empty_accent_phrases, style_id=1
-            ),
-            [],
-        )
-
-        result = self.synthesis_engine.replace_mora_pitch(
-            accent_phrases=deepcopy(self.accent_phrases_hello_hiho), style_id=1
-        )
+        # Inputs
+        phrases = []
+        # Outputs
+        result = self.synthesis_engine.replace_mora_pitch(phrases, style_id=1)
+        # Expects
+        true_result = []
+        # Tests
+        self.assertEqual(result, true_result)
 
-        # yukarin_saに渡される値の検証
+        # Inputs
+        hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
+        # Outputs & Indirect Outputs（yukarin_saに渡される値）
+        result = self.synthesis_engine.replace_mora_pitch(hello_hiho, 1)
         yukarin_sa_args = self.yukarin_sa_mock.call_args[1]
         list_length = yukarin_sa_args["length"]
         vowel_phoneme_list = yukarin_sa_args["vowel_phoneme_list"][0]
@@ -645,37 +646,14 @@ def test_replace_mora_pitch(self):
         end_accent_list = yukarin_sa_args["end_accent_list"][0]
         start_accent_phrase_list = yukarin_sa_args["start_accent_phrase_list"][0]
         end_accent_phrase_list = yukarin_sa_args["end_accent_phrase_list"][0]
-        self.assertEqual(list_length, 12)
-        self.assertEqual(list_length, len(vowel_phoneme_list))
-        self.assertEqual(list_length, len(consonant_phoneme_list))
-        self.assertEqual(list_length, len(start_accent_list))
-        self.assertEqual(list_length, len(end_accent_list))
-        self.assertEqual(list_length, len(start_accent_phrase_list))
-        self.assertEqual(list_length, len(end_accent_phrase_list))
-        self.assertEqual(yukarin_sa_args["style_id"], 1)
-
-        numpy.testing.assert_array_equal(
-            vowel_phoneme_list,
-            numpy.array([0, 30, 4, 21, 21, 7, 0, 21, 30, 14, 6, 0]),
-        )
-        numpy.testing.assert_array_equal(
-            consonant_phoneme_list,
-            numpy.array([-1, 23, -1, 28, 10, 42, -1, 19, 19, 12, 35, -1]),
-        )
-        numpy.testing.assert_array_equal(
-            start_accent_list, numpy.array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0])
-        )
-        numpy.testing.assert_array_equal(
-            end_accent_list, numpy.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0])
-        )
-        numpy.testing.assert_array_equal(
-            start_accent_phrase_list, numpy.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
-        )
-        numpy.testing.assert_array_equal(
-            end_accent_phrase_list, numpy.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])
-        )
-
-        # flatten_morasを使わずに愚直にaccent_phrasesにデータを反映させてみる
+        style_id = yukarin_sa_args["style_id"]
+        # Expects
+        true_vowels = numpy.array([0, 30, 4, 21, 21, 7, 0, 21, 30, 14, 6, 0])
+        true_consonants = numpy.array([-1, 23, -1, 28, 10, 42, -1, 19, 19, 12, 35, -1])
+        true_accent_starts = numpy.array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0])
+        true_accent_ends = numpy.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0])
+        true_phrase_starts = numpy.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
+        true_phrase_ends = numpy.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])
         true_result = deepcopy(self.accent_phrases_hello_hiho)
         index = 1
 
@@ -703,5 +681,19 @@ def result_value(i: int):
             if accent_phrase.pause_mora is not None:
                 accent_phrase.pause_mora.pitch = result_value(index)
                 index += 1
-
+        # Tests
+        self.assertEqual(list_length, 12)
+        self.assertEqual(list_length, len(vowel_phoneme_list))
+        self.assertEqual(list_length, len(consonant_phoneme_list))
+        self.assertEqual(list_length, len(start_accent_list))
+        self.assertEqual(list_length, len(end_accent_list))
+        self.assertEqual(list_length, len(start_accent_phrase_list))
+        self.assertEqual(list_length, len(end_accent_phrase_list))
+        self.assertEqual(style_id, 1)
+        numpy.testing.assert_array_equal(vowel_phoneme_list, true_vowels)
+        numpy.testing.assert_array_equal(consonant_phoneme_list, true_consonants)
+        numpy.testing.assert_array_equal(start_accent_list, true_accent_starts)
+        numpy.testing.assert_array_equal(end_accent_list, true_accent_ends)
+        numpy.testing.assert_array_equal(start_accent_phrase_list, true_phrase_starts)
+        numpy.testing.assert_array_equal(end_accent_phrase_list, true_phrase_ends)
         self.assertEqual(result, true_result)

From 39853ea9f14e6a7e65225dfda50fc24aa82bd689 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 11:53:32 +0900
Subject: [PATCH 092/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=B3?=
 =?UTF-8?q?=E3=82=A2=E7=94=9F=E6=88=90=E3=81=AE=E3=83=A2=E3=82=B8=E3=83=A5?=
 =?UTF-8?q?=E3=83=BC=E3=83=AB=E7=A7=BB=E6=A4=8D=20(#947)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: コア生成モジュールへの移植

* fix: lint

* fix: コア取得リネーム

* 単数形へ

変更漏れ

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 run.py                                                |  9 +++------
 test/e2e/conftest.py                                  |  5 +++--
 voicevox_engine/cancellable_engine.py                 |  5 +++--
 .../make_tts_engines.py => core_initializer.py}       | 11 +++++------
 voicevox_engine/tts_pipeline/__init__.py              |  2 --
 voicevox_engine/tts_pipeline/tts_engine.py            |  2 +-
 6 files changed, 15 insertions(+), 19 deletions(-)
 rename voicevox_engine/{tts_pipeline/make_tts_engines.py => core_initializer.py} (95%)

diff --git a/run.py b/run.py
index 6f3de1168..3d8e202cb 100644
--- a/run.py
+++ b/run.py
@@ -30,6 +30,7 @@
 from voicevox_engine import __version__
 from voicevox_engine.cancellable_engine import CancellableEngine
 from voicevox_engine.core_adapter import CoreAdapter
+from voicevox_engine.core_initializer import initialize_cores
 from voicevox_engine.engine_manifest import EngineManifestLoader
 from voicevox_engine.engine_manifest.EngineManifest import EngineManifest
 from voicevox_engine.library_manager import LibraryManager
@@ -67,11 +68,7 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.tts_pipeline import (
-    TTSEngine,
-    make_cores,
-    make_tts_engines_from_cores,
-)
+from voicevox_engine.tts_pipeline import TTSEngine, make_tts_engines_from_cores
 from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
 from voicevox_engine.user_dict import (
     apply_word,
@@ -1490,7 +1487,7 @@ def main() -> None:
     cpu_num_threads: int | None = args.cpu_num_threads
     load_all_models: bool = args.load_all_models
 
-    cores = make_cores(
+    cores = initialize_cores(
         use_gpu=use_gpu,
         voicelib_dirs=voicelib_dirs,
         voicevox_dir=voicevox_dir,
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index ddae68de8..dbc3cb25f 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -6,9 +6,10 @@
 from syrupy.assertion import SnapshotAssertion
 from syrupy.extensions.json import JSONSnapshotExtension
 
+from voicevox_engine.core_initializer import initialize_cores
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import SettingLoader
-from voicevox_engine.tts_pipeline import make_cores, make_tts_engines_from_cores
+from voicevox_engine.tts_pipeline import make_tts_engines_from_cores
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
 
@@ -27,7 +28,7 @@ def snapshot_json(snapshot: SnapshotAssertion):
 
 @pytest.fixture(scope="session")
 def app_params():
-    cores = make_cores(use_gpu=False)
+    cores = initialize_cores(use_gpu=False)
     synthesis_engines = make_tts_engines_from_cores(cores)
     latest_core_version = get_latest_core_version(
         versions=list(synthesis_engines.keys())
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index 4fc3006e2..6cf8d0bd0 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -16,8 +16,9 @@
 # FIXME: remove FastAPI dependency
 from fastapi import HTTPException, Request
 
+from .core_initializer import initialize_cores
 from .model import AudioQuery
-from .tts_pipeline import make_cores, make_tts_engines_from_cores
+from .tts_pipeline import make_tts_engines_from_cores
 from .utility import get_latest_core_version
 
 
@@ -219,7 +220,7 @@ def start_synthesis_subprocess(
         メインプロセスと通信するためのPipe
     """
 
-    cores = make_cores(
+    cores = initialize_cores(
         use_gpu=use_gpu,
         voicelib_dirs=voicelib_dirs,
         voicevox_dir=voicevox_dir,
diff --git a/voicevox_engine/tts_pipeline/make_tts_engines.py b/voicevox_engine/core_initializer.py
similarity index 95%
rename from voicevox_engine/tts_pipeline/make_tts_engines.py
rename to voicevox_engine/core_initializer.py
index bb1857468..4832ee805 100644
--- a/voicevox_engine/tts_pipeline/make_tts_engines.py
+++ b/voicevox_engine/core_initializer.py
@@ -3,15 +3,14 @@
 from pathlib import Path
 from typing import List, Optional
 
-from ..core_wrapper import CoreWrapper, load_runtime_lib
-from ..utility import engine_root, get_save_dir
-from .tts_engine import CoreAdapter
+from .core_wrapper import CoreWrapper, load_runtime_lib
+from .tts_pipeline.tts_engine import CoreAdapter
+from .utility import engine_root, get_save_dir
 
 MOCK_VER = "0.0.0"
 
 
-# FIXME: ファイル名を変えるか関数の場所を変える
-def make_cores(
+def initialize_cores(
     use_gpu: bool,
     voicelib_dirs: Optional[List[Path]] = None,
     voicevox_dir: Optional[Path] = None,
@@ -130,7 +129,7 @@ def load_core_library(core_dir: Path, suppress_error: bool = False) -> None:
 
     else:
         # モック追加
-        from ..dev.core import MockCoreWrapper
+        from .dev.core import MockCoreWrapper
 
         if MOCK_VER not in cores:
             print("Info: Loading mock.")
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
index 110256fc4..d6e066469 100644
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ b/voicevox_engine/tts_pipeline/__init__.py
@@ -1,11 +1,9 @@
 from ..core_wrapper import CoreWrapper, load_runtime_lib
-from .make_tts_engines import make_cores
 from .tts_engine import TTSEngine, make_tts_engines_from_cores
 
 __all__ = [
     "CoreWrapper",
     "load_runtime_lib",
-    "make_cores",
     "make_tts_engines_from_cores",
     "TTSEngine",
 ]
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index a49ef505b..6de529e5a 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -463,7 +463,7 @@ def synthesis(
 
 def make_tts_engines_from_cores(cores: dict[str, CoreAdapter]) -> dict[str, TTSEngine]:
     """コア一覧からTTSエンジン一覧を生成する"""
-    # FIXME: `MOCK_VER` を循環 import 無しに `make_cores()` 関連モジュールから import する
+    # FIXME: `MOCK_VER` を循環 import 無しに `initialize_cores()` 関連モジュールから import する
     MOCK_VER = "0.0.0"
     tts_engines: dict[str, TTSEngine] = {}
     for ver, core in cores.items():

From e6f3824ea317eb88ab104283385a8811952822bb Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 15:24:38 +0900
Subject: [PATCH 093/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=A2?=
 =?UTF-8?q?=E3=82=AF=E3=82=BB=E3=83=B3=E3=83=88=E5=8F=A5=E7=94=9F=E6=88=90?=
 =?UTF-8?q?=E3=81=AE=E7=B0=A1=E7=95=A5=E5=8C=96=20(#957)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: アクセント句生成の簡略化
---
 test/test_mock_synthesis_engine.py | 102 +++++++----------------------
 1 file changed, 22 insertions(+), 80 deletions(-)

diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py
index 4bdf7d24e..c37d4b638 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_synthesis_engine.py
@@ -6,6 +6,18 @@
 from voicevox_engine.tts_pipeline.kana_converter import create_kana
 
 
+def _gen_mora(text: str, consonant: str | None, vowel: str) -> Mora:
+    """モーラ (length=0, pitch=0) を生成する"""
+    return Mora(
+        text=text,
+        consonant=consonant,
+        consonant_length=0.0 if consonant else None,
+        vowel=vowel,
+        vowel_length=0.0,
+        pitch=0.0,
+    )
+
+
 class TestMockTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
@@ -13,91 +25,21 @@ def setUp(self):
         self.accent_phrases_hello_hiho = [
             AccentPhrase(
                 moras=[
-                    Mora(
-                        text="コ",
-                        consonant="k",
-                        consonant_length=0.0,
-                        vowel="o",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ン",
-                        consonant=None,
-                        consonant_length=None,
-                        vowel="N",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ニ",
-                        consonant="n",
-                        consonant_length=0.0,
-                        vowel="i",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="チ",
-                        consonant="ch",
-                        consonant_length=0.0,
-                        vowel="i",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ワ",
-                        consonant="w",
-                        consonant_length=0.0,
-                        vowel="a",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
+                    _gen_mora("コ", "k", "o"),
+                    _gen_mora("ン", None, "N"),
+                    _gen_mora("ニ", "n", "i"),
+                    _gen_mora("チ", "ch", "i"),
+                    _gen_mora("ワ", "w", "a"),
                 ],
                 accent=5,
-                pause_mora=Mora(
-                    text="、",
-                    consonant=None,
-                    consonant_length=None,
-                    vowel="pau",
-                    vowel_length=0.0,
-                    pitch=0.0,
-                ),
+                pause_mora=_gen_mora("、", None, "pau"),
             ),
             AccentPhrase(
                 moras=[
-                    Mora(
-                        text="ヒ",
-                        consonant="h",
-                        consonant_length=0.0,
-                        vowel="i",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ホ",
-                        consonant="h",
-                        consonant_length=0.0,
-                        vowel="o",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="デ",
-                        consonant="d",
-                        consonant_length=0.0,
-                        vowel="e",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
-                    Mora(
-                        text="ス",
-                        consonant="s",
-                        consonant_length=0.0,
-                        vowel="U",
-                        vowel_length=0.0,
-                        pitch=0.0,
-                    ),
+                    _gen_mora("ヒ", "h", "i"),
+                    _gen_mora("ホ", "h", "o"),
+                    _gen_mora("デ", "d", "e"),
+                    _gen_mora("ス", "s", "U"),
                 ],
                 accent=1,
                 pause_mora=None,

From ec963625b84ba7ac52ab300b8614bbd9cc507390 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 18:00:42 +0900
Subject: [PATCH 094/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`MockTTSEngine`?=
 =?UTF-8?q?=20=E7=B4=94=E3=83=A2=E3=83=83=E3=82=AF=E5=8C=96=20(#948)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `MockTTSEngine` リネーム

* refactor: `TTSEngineLight` 初期化の引数内部化

* fix: リネーム
---
 build_util/make_docs.py                      |  2 +-
 test/test_mock_synthesis_engine.py           |  3 +--
 voicevox_engine/dev/synthesis_engine/mock.py | 12 +++++-------
 voicevox_engine/tts_pipeline/tts_engine.py   |  2 +-
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index fb0ac36a0..1f3541c7a 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -39,7 +39,7 @@ def generate_api_docs_html(schema: str) -> str:
     mock_core = MockCoreWrapper()
     # FastAPI の機能を用いて OpenAPI schema を生成する
     app = run.generate_app(
-        synthesis_engines={"mock": MockTTSEngine(mock_core)},
+        synthesis_engines={"mock": MockTTSEngine()},
         cores={"mock": CoreAdapter(mock_core)},
         latest_core_version="mock",
         setting_loader=SettingLoader(USER_SETTING_PATH),
diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py
index c37d4b638..7a7ad7f73 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_synthesis_engine.py
@@ -1,6 +1,5 @@
 from unittest import TestCase
 
-from voicevox_engine.dev.core import MockCoreWrapper
 from voicevox_engine.dev.synthesis_engine import MockTTSEngine
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.kana_converter import create_kana
@@ -45,7 +44,7 @@ def setUp(self):
                 pause_mora=None,
             ),
         ]
-        self.engine = MockTTSEngine(MockCoreWrapper())
+        self.engine = MockTTSEngine()
 
     def test_replace_phoneme_length(self):
         """`.replace_phoneme_length()` がエラー無く生成をおこなう"""
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index b6e1a3044..ea1a0d204 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -6,19 +6,17 @@
 from pyopenjtalk import tts
 from soxr import resample
 
-from ...core_wrapper import CoreWrapper
 from ...model import AudioQuery
 from ...tts_pipeline import TTSEngine
 from ...tts_pipeline.tts_engine import to_flatten_moras
+from ..core.mock import MockCoreWrapper
 
 
 class MockTTSEngine(TTSEngine):
-    """
-    TTSEngine [Mock]
-    """
+    """製品版コア無しに音声合成が可能なモック版TTSEngine"""
 
-    def __init__(self, core: CoreWrapper):
-        super().__init__(core)
+    def __init__(self):
+        super().__init__(MockCoreWrapper())
 
     def synthesis(
         self,
@@ -26,7 +24,7 @@ def synthesis(
         style_id: int,
         enable_interrogative_upspeak: bool = True,
     ) -> np.ndarray:
-        """音声合成用のクエリに含まれる読み仮名に基づいてOpenJTalkで音声波形を生成する (Mock)"""
+        """音声合成用のクエリに含まれる読み仮名に基づいてOpenJTalkで音声波形を生成する"""
         # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
         query = copy.deepcopy(query)
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 6de529e5a..0880faef2 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -470,7 +470,7 @@ def make_tts_engines_from_cores(cores: dict[str, CoreAdapter]) -> dict[str, TTSE
         if ver == MOCK_VER:
             from ..dev.synthesis_engine import MockTTSEngine
 
-            tts_engines[ver] = MockTTSEngine(core.core)
+            tts_engines[ver] = MockTTSEngine()
         else:
             tts_engines[ver] = TTSEngine(core.core)
     return tts_engines

From b18ef84d6bff7d0b6a3977aeb57fcbb0398127e2 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 31 Dec 2023 18:17:07 +0900
Subject: [PATCH 095/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`synthesis=5Fengi?=
 =?UTF-8?q?nes`=20=E9=96=A2=E9=80=A3=E5=90=8D=E3=81=AE=E3=83=AA=E3=83=8D?=
 =?UTF-8?q?=E3=83=BC=E3=83=A0=20(#949)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: synthesis_engine 関連名のリネーム

* refactor: モックモジュール名のリネーム

* fix: lint

* fix: モジュール名参照

* refactor: テストモジュールリネーム

* to core_initializer

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 README.md                                      |  2 +-
 build_util/make_docs.py                        |  5 ++---
 run.py                                         | 18 ++++++++----------
 test/e2e/conftest.py                           |  8 +++-----
 ...hesis_engine.py => test_mock_tts_engine.py} |  2 +-
 test/test_mora_to_text.py                      |  1 -
 ..._synthesis_engine.py => test_tts_engine.py} | 10 ++++------
 ..._engine_base.py => test_tts_engine_base.py} |  6 +++---
 voicevox_engine/cancellable_engine.py          | 18 ++++++++----------
 .../__init__.py                                |  0
 .../{synthesis_engine => tts_engine}/mock.py   |  0
 voicevox_engine/morphing.py                    |  2 +-
 voicevox_engine/tts_pipeline/tts_engine.py     |  2 +-
 13 files changed, 32 insertions(+), 42 deletions(-)
 rename test/{test_mock_synthesis_engine.py => test_mock_tts_engine.py} (97%)
 rename test/{test_synthesis_engine.py => test_tts_engine.py} (98%)
 rename test/{test_synthesis_engine_base.py => test_tts_engine_base.py} (97%)
 rename voicevox_engine/dev/{synthesis_engine => tts_engine}/__init__.py (100%)
 rename voicevox_engine/dev/{synthesis_engine => tts_engine}/mock.py (100%)

diff --git a/README.md b/README.md
index 0c54b115a..a4022d39f 100644
--- a/README.md
+++ b/README.md
@@ -619,7 +619,7 @@ VOICEVOX ENGINE リポジトリを fork し、一部の機能を改造するの
 キャラクター情報は`speaker_info`ディレクトリ内のファイルで管理されています。
 ダミーのアイコンなどが用意されているので適宜変更してください。
 
-音声合成は`voicevox_engine/synthesis_engine/synthesis_engine.py`で行われています。
+音声合成は`voicevox_engine/tts_pipeline/tts_engine.py`で行われています。
 VOICEVOX API での音声合成は、エンジン側で音声合成用のクエリ `AudioQuery` の初期値を作成してユーザーに返し、ユーザーが必要に応じてクエリを編集したあと、エンジンがクエリに従って音声合成することで実現しています。
 クエリ作成は`/audio_query`エンドポイントで、音声合成は`/synthesis`エンドポイントで行っており、最低この２つに対応すれば VOICEVOX API に準拠したことになります。
 
diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index 1f3541c7a..bde98cead 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 
 from voicevox_engine.dev.core import MockCoreWrapper
-from voicevox_engine.dev.synthesis_engine.mock import MockTTSEngine
+from voicevox_engine.dev.tts_engine.mock import MockTTSEngine
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
 from voicevox_engine.tts_pipeline.tts_engine import CoreAdapter
@@ -33,13 +33,12 @@ def generate_api_docs_html(schema: str) -> str:
 
 
 if __name__ == "__main__":
-
     import run
 
     mock_core = MockCoreWrapper()
     # FastAPI の機能を用いて OpenAPI schema を生成する
     app = run.generate_app(
-        synthesis_engines={"mock": MockTTSEngine()},
+        tts_engines={"mock": MockTTSEngine()},
         cores={"mock": CoreAdapter(mock_core)},
         latest_core_version="mock",
         setting_loader=SettingLoader(USER_SETTING_PATH),
diff --git a/run.py b/run.py
index 3d8e202cb..509dbc11f 100644
--- a/run.py
+++ b/run.py
@@ -142,7 +142,7 @@ def set_output_log_utf8() -> None:
 
 
 def generate_app(
-    synthesis_engines: Dict[str, TTSEngine],
+    tts_engines: Dict[str, TTSEngine],
     cores: Dict[str, CoreAdapter],
     latest_core_version: str,
     setting_loader: SettingLoader,
@@ -247,9 +247,9 @@ def apply_user_dict():
 
     def get_engine(core_version: Optional[str]) -> TTSEngine:
         if core_version is None:
-            return synthesis_engines[latest_core_version]
-        if core_version in synthesis_engines:
-            return synthesis_engines[core_version]
+            return tts_engines[latest_core_version]
+        if core_version in tts_engines:
+            return tts_engines[core_version]
         raise HTTPException(status_code=422, detail="不明なバージョンです")
 
     def get_core(core_version: Optional[str]) -> CoreAdapter:
@@ -1496,11 +1496,9 @@ def main() -> None:
         enable_mock=enable_mock,
         load_all_models=load_all_models,
     )
-    synthesis_engines = make_tts_engines_from_cores(cores)
-    assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
-    latest_core_version = get_latest_core_version(
-        versions=list(synthesis_engines.keys())
-    )
+    tts_engines = make_tts_engines_from_cores(cores)
+    assert len(tts_engines) != 0, "音声合成エンジンがありません。"
+    latest_core_version = get_latest_core_version(versions=list(tts_engines.keys()))
 
     # Cancellable Engine
     enable_cancellable_synthesis: bool = args.enable_cancellable_synthesis
@@ -1560,7 +1558,7 @@ def main() -> None:
 
     uvicorn.run(
         generate_app(
-            synthesis_engines,
+            tts_engines,
             cores,
             latest_core_version,
             setting_loader,
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index dbc3cb25f..4e104398b 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -29,16 +29,14 @@ def snapshot_json(snapshot: SnapshotAssertion):
 @pytest.fixture(scope="session")
 def app_params():
     cores = initialize_cores(use_gpu=False)
-    synthesis_engines = make_tts_engines_from_cores(cores)
-    latest_core_version = get_latest_core_version(
-        versions=list(synthesis_engines.keys())
-    )
+    tts_engines = make_tts_engines_from_cores(cores)
+    latest_core_version = get_latest_core_version(versions=list(tts_engines.keys()))
     setting_loader = SettingLoader(Path("./not_exist.yaml"))
     preset_manager = PresetManager(  # FIXME: impl MockPresetManager
         preset_path=Path("./presets.yaml"),
     )
     return {
-        "synthesis_engines": synthesis_engines,
+        "tts_engines": tts_engines,
         "cores": cores,
         "latest_core_version": latest_core_version,
         "setting_loader": setting_loader,
diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_tts_engine.py
similarity index 97%
rename from test/test_mock_synthesis_engine.py
rename to test/test_mock_tts_engine.py
index 7a7ad7f73..a7d0cf2d3 100644
--- a/test/test_mock_synthesis_engine.py
+++ b/test/test_mock_tts_engine.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
 
-from voicevox_engine.dev.synthesis_engine import MockTTSEngine
+from voicevox_engine.dev.tts_engine import MockTTSEngine
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.kana_converter import create_kana
 
diff --git a/test/test_mora_to_text.py b/test/test_mora_to_text.py
index c2b122d8e..8fcc6bd63 100644
--- a/test/test_mora_to_text.py
+++ b/test/test_mora_to_text.py
@@ -1,6 +1,5 @@
 from unittest import TestCase
 
-# TODO: import from voicevox_engine.synthesis_engine.mora
 from voicevox_engine.tts_pipeline.text_analyzer import mora_to_text
 
 
diff --git a/test/test_synthesis_engine.py b/test/test_tts_engine.py
similarity index 98%
rename from test/test_synthesis_engine.py
rename to test/test_tts_engine.py
index 92eacb14e..bbb790b09 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_tts_engine.py
@@ -8,8 +8,6 @@
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
-
-# TODO: import from voicevox_engine.synthesis_engine.mora
 from voicevox_engine.tts_pipeline.tts_engine import (
     apply_intonation_scale,
     apply_output_sampling_rate,
@@ -499,7 +497,7 @@ def setUp(self):
         self.yukarin_s_mock = core.yukarin_s_forward
         self.yukarin_sa_mock = core.yukarin_sa_forward
         self.decode_mock = core.decode_forward
-        self.synthesis_engine = TTSEngine(core=core)
+        self.tts_engine = TTSEngine(core=core)
 
     def test_to_flatten_moras(self):
         flatten_moras = to_flatten_moras(self.accent_phrases_hello_hiho)
@@ -585,7 +583,7 @@ def test_replace_phoneme_length(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_sに渡される値）
-        result = self.synthesis_engine.replace_phoneme_length(hello_hiho, style_id=1)
+        result = self.tts_engine.replace_phoneme_length(hello_hiho, style_id=1)
         yukarin_s_args = self.yukarin_s_mock.call_args[1]
         list_length = yukarin_s_args["length"]
         phoneme_list = yukarin_s_args["phoneme_list"]
@@ -628,7 +626,7 @@ def test_replace_mora_pitch(self):
         # Inputs
         phrases = []
         # Outputs
-        result = self.synthesis_engine.replace_mora_pitch(phrases, style_id=1)
+        result = self.tts_engine.replace_mora_pitch(phrases, style_id=1)
         # Expects
         true_result = []
         # Tests
@@ -637,7 +635,7 @@ def test_replace_mora_pitch(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_saに渡される値）
-        result = self.synthesis_engine.replace_mora_pitch(hello_hiho, 1)
+        result = self.tts_engine.replace_mora_pitch(hello_hiho, style_id=1)
         yukarin_sa_args = self.yukarin_sa_mock.call_args[1]
         list_length = yukarin_sa_args["length"]
         vowel_phoneme_list = yukarin_sa_args["vowel_phoneme_list"][0]
diff --git a/test/test_synthesis_engine_base.py b/test/test_tts_engine_base.py
similarity index 97%
rename from test/test_synthesis_engine_base.py
rename to test/test_tts_engine_base.py
index 730879434..2d8b566b5 100644
--- a/test/test_synthesis_engine_base.py
+++ b/test/test_tts_engine_base.py
@@ -94,7 +94,7 @@ def koreha_arimasuka_base_expected():
 class TestTTSEngineBase(TestCase):
     def setUp(self):
         super().setUp()
-        self.synthesis_engine = TTSEngine(core=MockCoreWrapper())
+        self.tts_engine = TTSEngine(core=MockCoreWrapper())
 
     def create_synthesis_test_base(
         self,
@@ -105,7 +105,7 @@ def create_synthesis_test_base(
         """音声合成時に疑問文モーラ処理を行っているかどうかを検証
         (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
         """
-        inputs = self.synthesis_engine.create_accent_phrases(text, 1)
+        inputs = self.tts_engine.create_accent_phrases(text, 1)
         outputs = apply_interrogative_upspeak(inputs, enable_interrogative_upspeak)
         self.assertEqual(expected, outputs, f"case(text:{text})")
 
@@ -116,7 +116,7 @@ def test_create_accent_phrases(self):
         text = "これはありますか？"
         expected = koreha_arimasuka_base_expected()
         expected[-1].is_interrogative = True
-        actual = self.synthesis_engine.create_accent_phrases(text, 1)
+        actual = self.tts_engine.create_accent_phrases(text, 1)
         self.assertEqual(expected, actual, f"case(text:{text})")
 
     def test_upspeak_voiced_last_mora(self):
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index 6cf8d0bd0..d133b831d 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -29,7 +29,7 @@ class CancellableEngine:
     （オリジナルと比べ引数が増えているので注意）
 
     パラメータ use_gpu, voicelib_dirs, voicevox_dir,
-    runtime_dirs, cpu_num_threads, enable_mock は、 make_synthesis_engines を参照
+    runtime_dirs, cpu_num_threads, enable_mock は、 core_initializer を参照
 
     Attributes
     ----------
@@ -212,7 +212,7 @@ def start_synthesis_subprocess(
     pickle化の関係でグローバルに書いている
 
     引数 use_gpu, voicelib_dirs, voicevox_dir,
-    runtime_dirs, cpu_num_threads, enable_mock は、 make_synthesis_engines を参照
+    runtime_dirs, cpu_num_threads, enable_mock は、 core_initializer を参照
 
     Parameters
     ----------
@@ -228,19 +228,17 @@ def start_synthesis_subprocess(
         cpu_num_threads=cpu_num_threads,
         enable_mock=enable_mock,
     )
-    synthesis_engines = make_tts_engines_from_cores(cores)
+    tts_engines = make_tts_engines_from_cores(cores)
 
-    assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
-    latest_core_version = get_latest_core_version(
-        versions=list(synthesis_engines.keys())
-    )
+    assert len(tts_engines) != 0, "音声合成エンジンがありません。"
+    latest_core_version = get_latest_core_version(versions=list(tts_engines.keys()))
     while True:
         try:
             query, style_id, core_version = sub_proc_con.recv()
             if core_version is None:
-                _engine = synthesis_engines[latest_core_version]
-            elif core_version in synthesis_engines:
-                _engine = synthesis_engines[core_version]
+                _engine = tts_engines[latest_core_version]
+            elif core_version in tts_engines:
+                _engine = tts_engines[core_version]
             else:
                 # バージョンが見つからないエラー
                 sub_proc_con.send("")
diff --git a/voicevox_engine/dev/synthesis_engine/__init__.py b/voicevox_engine/dev/tts_engine/__init__.py
similarity index 100%
rename from voicevox_engine/dev/synthesis_engine/__init__.py
rename to voicevox_engine/dev/tts_engine/__init__.py
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/tts_engine/mock.py
similarity index 100%
rename from voicevox_engine/dev/synthesis_engine/mock.py
rename to voicevox_engine/dev/tts_engine/mock.py
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 31be8cfa9..064908751 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -198,7 +198,7 @@ def synthesis_morphing(
         morph_param.frame_period,
     )
 
-    # TODO: synthesis_engine.py でのリサンプル処理と共通化する
+    # TODO: tts_engine.py でのリサンプル処理と共通化する
     if output_fs != morph_param.fs:
         y_h = resample(y_h, morph_param.fs, output_fs)
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 0880faef2..a3804dc32 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -468,7 +468,7 @@ def make_tts_engines_from_cores(cores: dict[str, CoreAdapter]) -> dict[str, TTSE
     tts_engines: dict[str, TTSEngine] = {}
     for ver, core in cores.items():
         if ver == MOCK_VER:
-            from ..dev.synthesis_engine import MockTTSEngine
+            from ..dev.tts_engine import MockTTSEngine
 
             tts_engines[ver] = MockTTSEngine()
         else:

From c44c516dd9ac8c386965c7b6602251bd77ff2543 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Mon, 1 Jan 2024 20:39:22 +0900
Subject: [PATCH 096/177] =?UTF-8?q?POST=20/user=5Fdict=5Fword=E3=81=AE?=
 =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E8=BF=BD=E5=8A=A0=20(#945)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

post_user_dict_wordのテスト追加
---
 test/e2e/test_user_dict_word.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 test/e2e/test_user_dict_word.py

diff --git a/test/e2e/test_user_dict_word.py b/test/e2e/test_user_dict_word.py
new file mode 100644
index 000000000..a89ddb7da
--- /dev/null
+++ b/test/e2e/test_user_dict_word.py
@@ -0,0 +1,24 @@
+"""
+ユーザー辞書の言葉のAPIのテスト
+"""
+
+
+from fastapi.testclient import TestClient
+
+
+def test_post_user_dict_word(client: TestClient):
+    true_params = {
+        "surface": "test",
+        "pronunciation": "テスト",
+        "accent_type": 1,
+        "word_type": "PROPER_NOUN",
+        "priority": 5,
+    }
+
+    # 正常系
+    response = client.post("/user_dict_word", params=true_params)
+    assert response.status_code == 200
+
+    # 範囲外の優先度はエラー
+    response = client.post("/user_dict_word", params={**true_params, "priority": 100})
+    assert response.status_code == 422

From 45d8a3e6df695394b6411999b5e12830e10f2618 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Mon, 1 Jan 2024 21:47:25 +0900
Subject: [PATCH 097/177] =?UTF-8?q?TYP:=20=E5=85=A8=E3=81=A6=E3=81=AE?=
 =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB=E3=82=92mypy=E3=81=AE?=
 =?UTF-8?q?=E5=AF=BE=E8=B1=A1=E3=81=AB=E3=81=99=E3=82=8B=20(#959)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* TYP: 全てのファイルをmypyの対象にする

* FIX: コメント追加

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

* FIX: test_user_dict_word.py

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 build_util/check_release_build.py  |  1 +
 build_util/merge_update_infos.py   | 16 ++++++++------
 pyproject.toml                     |  2 +-
 test/e2e/conftest.py               |  2 +-
 test/e2e/test_disable_api.py       |  4 ++--
 test/e2e/test_user_dict_word.py    |  4 ++--
 test/e2e/test_validate_speakers.py |  2 +-
 test/e2e/test_validate_version.py  |  2 +-
 test/test_kana_converter.py        |  7 +++---
 test/test_library_manager.py       |  2 +-
 test/test_preset.py                | 12 +++++------
 test/test_text_analyzer.py         |  2 +-
 test/test_tts_engine.py            | 34 ++++++++++++++++--------------
 test/test_tts_engine_base.py       |  2 +-
 test/test_user_dict.py             |  3 +--
 test/test_user_dict_model.py       | 19 ++++++++++++++++-
 voicevox_engine/library_manager.py |  9 ++++----
 17 files changed, 72 insertions(+), 51 deletions(-)

diff --git a/build_util/check_release_build.py b/build_util/check_release_build.py
index 008f25548..ae14ff1af 100644
--- a/build_util/check_release_build.py
+++ b/build_util/check_release_build.py
@@ -56,6 +56,7 @@ def test_release_build(dist_dir: Path, skip_run_process: bool) -> None:
 
     if not skip_run_process:
         # プロセスが稼働中であることを確認
+        assert process is not None
         assert process.poll() is None
 
         # 停止
diff --git a/build_util/merge_update_infos.py b/build_util/merge_update_infos.py
index d3a5bb3a8..7c1ea5784 100644
--- a/build_util/merge_update_infos.py
+++ b/build_util/merge_update_infos.py
@@ -6,7 +6,6 @@
 import json
 from collections import OrderedDict
 from pathlib import Path
-from typing import Dict, List, Union
 
 
 def merge_json_string(src: str, dst: str) -> str:
@@ -23,8 +22,10 @@ def merge_json_string(src: str, dst: str) -> str:
     >>> merge_json_string(src, dst)
     '[{"version": "1"}]'
     """
-    src_json: List[Dict[str, Union[str, List[str]]]] = json.loads(src)
-    dst_json: List[Dict[str, Union[str, List[str]]]] = json.loads(dst)
+    # FIXME: バリデーションする
+    # TODO: `str | list[str]`だけど`str`が来るとエラーになるのでならないようにしたい
+    src_json: list[dict[str, str | list[str]]] = json.loads(src)
+    dst_json: list[dict[str, str | list[str]]] = json.loads(dst)
 
     for src_item in src_json:
         for dst_item in dst_json:
@@ -33,10 +34,13 @@ def merge_json_string(src: str, dst: str) -> str:
                     if key == "version":
                         continue
 
+                    src_value = src_item[key]
+                    dst_value = dst_item[key]
+                    assert isinstance(src_value, list)
+                    assert isinstance(dst_value, list)
+
                     # 異なるものがあった場合だけ後ろに付け足す
-                    src_item[key] = list(
-                        OrderedDict.fromkeys(src_item[key] + dst_item[key])
-                    )
+                    src_item[key] = list(OrderedDict.fromkeys(src_value + dst_value))
 
     return json.dumps(src_json)
 
diff --git a/pyproject.toml b/pyproject.toml
index 3cf991f3f..eb421d3e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ py_version = "py311"
 isort_known_first_party = ["voicevox_engine"]
 isort_known_third_party = ["numpy"]
 [[tool.pysen.lint.mypy_targets]]
-paths = ["run.py", "voicevox_engine/"] # TODO: paths = ["."]
+paths = ["."]
 
 [tool.black] # automatically generated by pysen
 # pysen ignores and overwrites any modifications
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 4e104398b..7981d1457 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -14,7 +14,7 @@
 
 
 @pytest.fixture
-def snapshot_json(snapshot: SnapshotAssertion):
+def snapshot_json(snapshot: SnapshotAssertion) -> SnapshotAssertion:
     """
     syrupyでJSONをsnapshotするためのfixture。
 
diff --git a/test/e2e/test_disable_api.py b/test/e2e/test_disable_api.py
index e60fa5470..27dd292f8 100644
--- a/test/e2e/test_disable_api.py
+++ b/test/e2e/test_disable_api.py
@@ -13,7 +13,7 @@ def _assert_request_and_response_403(
     client: TestClient,
     method: Literal["post", "get", "put", "delete"],
     path: str,
-):
+) -> None:
     if method == "post":
         response = client.post(path)
     elif method == "get":
@@ -28,7 +28,7 @@ def _assert_request_and_response_403(
     assert response.status_code == 403, f"{method} {path} が403を返しませんでした"
 
 
-def test_disable_mutable_api(app_params: dict):
+def test_disable_mutable_api(app_params: dict) -> None:
     """エンジンの静的なデータを変更するAPIを無効化するテスト"""
     client = TestClient(generate_app(**app_params, disable_mutable_api=True))
 
diff --git a/test/e2e/test_user_dict_word.py b/test/e2e/test_user_dict_word.py
index a89ddb7da..87c4cd1f1 100644
--- a/test/e2e/test_user_dict_word.py
+++ b/test/e2e/test_user_dict_word.py
@@ -6,8 +6,8 @@
 from fastapi.testclient import TestClient
 
 
-def test_post_user_dict_word(client: TestClient):
-    true_params = {
+def test_post_user_dict_word(client: TestClient) -> None:
+    true_params: dict[str, str | int] = {
         "surface": "test",
         "pronunciation": "テスト",
         "accent_type": 1,
diff --git a/test/e2e/test_validate_speakers.py b/test/e2e/test_validate_speakers.py
index c212e5fa0..b93c5f6a6 100644
--- a/test/e2e/test_validate_speakers.py
+++ b/test/e2e/test_validate_speakers.py
@@ -4,7 +4,7 @@
 
 def test_fetch_speakers_success(
     client: TestClient, snapshot_json: JSONSnapshotExtension
-):
+) -> None:
     response = client.get("/speakers")
     assert response.status_code == 200
     assert snapshot_json == response.json()
diff --git a/test/e2e/test_validate_version.py b/test/e2e/test_validate_version.py
index b431a4a44..be1ca649d 100644
--- a/test/e2e/test_validate_version.py
+++ b/test/e2e/test_validate_version.py
@@ -3,7 +3,7 @@
 from voicevox_engine import __version__
 
 
-def test_fetch_version_success(client: TestClient):
+def test_fetch_version_success(client: TestClient) -> None:
     response = client.get("/version")
     assert response.status_code == 200
     assert response.json() == __version__
diff --git a/test/test_kana_converter.py b/test/test_kana_converter.py
index 02fa6bdb5..94309d7c7 100644
--- a/test/test_kana_converter.py
+++ b/test/test_kana_converter.py
@@ -1,4 +1,3 @@
-from typing import List
 from unittest import TestCase
 
 from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
@@ -6,7 +5,7 @@
 from voicevox_engine.tts_pipeline.kana_converter import create_kana
 
 
-def parse_kana(text: str) -> List[AccentPhrase]:
+def parse_kana(text: str) -> list[AccentPhrase]:
     accent_phrases = kana_converter.parse_kana(text)
     return accent_phrases
 
@@ -57,7 +56,7 @@ def test_roundtrip(self):
             self.assertEqual(create_kana(parse_kana(text)), text)
 
     def _accent_phrase_marks_base(
-        self, text: str, expected_accent_phrases: List[AccentPhrase]
+        self, text: str, expected_accent_phrases: list[AccentPhrase]
     ) -> None:
         accent_phrases = kana_converter.parse_kana(text)
         self.assertEqual(expected_accent_phrases, accent_phrases)
@@ -530,7 +529,7 @@ def a_pause_a_question_pause_a_question_a_question_mark_accent_phrases():
 
 
 class TestParseKanaException(TestCase):
-    def _assert_error_code(self, kana: str, code: ParseKanaErrorCode):
+    def _assert_error_code(self, kana: str, code: ParseKanaErrorCode) -> None:
         with self.assertRaises(ParseKanaError) as err:
             parse_kana(kana)
         self.assertEqual(err.exception.errcode, code)
diff --git a/test/test_library_manager.py b/test/test_library_manager.py
index 9924a0703..51fe8bc50 100644
--- a/test/test_library_manager.py
+++ b/test/test_library_manager.py
@@ -44,7 +44,7 @@ def tearDown(self):
         self.library_file.close()
         self.library_filename.unlink()
 
-    def create_vvlib_without_manifest(self, filename: str):
+    def create_vvlib_without_manifest(self, filename: str) -> None:
         with ZipFile(filename, "w") as zf_out, ZipFile(
             self.library_filename, "r"
         ) as zf_in:
diff --git a/test/test_preset.py b/test/test_preset.py
index 3a162829c..bbbc608b8 100644
--- a/test/test_preset.py
+++ b/test/test_preset.py
@@ -164,8 +164,8 @@ def test_add_preset_write_failure(self):
             }
         )
         preset_manager.load_presets()
-        preset_manager.load_presets = lambda: []
-        preset_manager.preset_path = ""
+        preset_manager.load_presets = lambda: []  # type:ignore[method-assign]
+        preset_manager.preset_path = ""  # type: ignore[assignment]
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルに書き込み失敗しました"):
             preset_manager.add_preset(preset)
         self.assertEqual(len(preset_manager.presets), 2)
@@ -259,8 +259,8 @@ def test_update_preset_write_failure(self):
             }
         )
         preset_manager.load_presets()
-        preset_manager.load_presets = lambda: []
-        preset_manager.preset_path = ""
+        preset_manager.load_presets = lambda: []  # type:ignore[method-assign]
+        preset_manager.preset_path = ""  # type: ignore[assignment]
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルに書き込み失敗しました"):
             preset_manager.update_preset(preset)
         self.assertEqual(len(preset_manager.presets), 2)
@@ -295,8 +295,8 @@ def test_delete_preset_write_failure(self):
         copyfile(Path("test/presets-test-1.yaml"), temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset_manager.load_presets()
-        preset_manager.load_presets = lambda: []
-        preset_manager.preset_path = ""
+        preset_manager.load_presets = lambda: []  # type:ignore[method-assign]
+        preset_manager.preset_path = ""  # type: ignore[assignment]
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルに書き込み失敗しました"):
             preset_manager.delete_preset(1)
         self.assertEqual(len(preset_manager.presets), 2)
diff --git a/test/test_text_analyzer.py b/test/test_text_analyzer.py
index 4de4afad6..a07912f42 100644
--- a/test/test_text_analyzer.py
+++ b/test/test_text_analyzer.py
@@ -31,7 +31,7 @@ def contexts_to_feature(contexts: dict[str, str]) -> str:
 OjtContainer = MoraLabel | AccentPhraseLabel | BreathGroupLabel | UtteranceLabel
 
 
-def features(ojt_container: OjtContainer):
+def features(ojt_container: OjtContainer) -> list[str]:
     """コンテナインスタンスに直接的・間接的に含まれる全ての feature を返す"""
     return [contexts_to_feature(p.contexts) for p in ojt_container.labels]
 
diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index bbb790b09..099e13631 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -35,7 +35,7 @@ def is_same_phoneme(p1: Phoneme, p2: Phoneme) -> bool:
 
 
 def is_same_ojt_phoneme_list(
-    p1s: list[Phoneme | None], p2s: list[Phoneme | None]
+    p1s: list[Phoneme | None] | list[Phoneme], p2s: list[Phoneme | None] | list[Phoneme]
 ) -> bool:
     """2つのPhonemeリストで全要素ペアが同じ `.phoneme` を持つ"""
     if len(p1s) != len(p2s):
@@ -55,7 +55,9 @@ def is_same_ojt_phoneme_list(
     return True
 
 
-def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray):
+def yukarin_s_mock(
+    length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray
+) -> numpy.ndarray:
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
@@ -72,7 +74,7 @@ def yukarin_sa_mock(
     start_accent_phrase_list: numpy.ndarray,
     end_accent_phrase_list: numpy.ndarray,
     style_id: numpy.ndarray,
-):
+) -> numpy.ndarray:
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
@@ -99,7 +101,7 @@ def decode_mock(
     f0: numpy.ndarray,
     phoneme: numpy.ndarray,
     style_id: Union[numpy.ndarray, int],
-):
+) -> numpy.ndarray:
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
@@ -140,7 +142,7 @@ def _gen_query(
     volumeScale: float = 1.0,
     outputSamplingRate: int = 24000,
     outputStereo: bool = False,
-):
+) -> AudioQuery:
     """Generate AudioQuery with default meaningless arguments for test simplicity."""
     accent_phrases = [] if accent_phrases is None else accent_phrases
     return AudioQuery(
@@ -356,12 +358,12 @@ def test_count_frame_per_unit():
     ]
 
     # Expects
-    #                        Pre k  o  N pau h  i  h  O Pst
-    true_frame_per_phoneme = [2, 2, 4, 4, 2, 2, 4, 4, 2, 6]
-    true_frame_per_phoneme = numpy.array(true_frame_per_phoneme, dtype=numpy.int32)
-    #                    Pre ko  N pau hi hO Pst
-    true_frame_per_mora = [2, 6, 4, 2, 6, 6, 6]
-    true_frame_per_mora = numpy.array(true_frame_per_mora, dtype=numpy.int32)
+    #                             Pre k  o  N pau h  i  h  O Pst
+    true_frame_per_phoneme_list = [2, 2, 4, 4, 2, 2, 4, 4, 2, 6]
+    true_frame_per_phoneme = numpy.array(true_frame_per_phoneme_list, dtype=numpy.int32)
+    #                         Pre ko  N pau hi hO Pst
+    true_frame_per_mora_list = [2, 6, 4, 2, 6, 6, 6]
+    true_frame_per_mora = numpy.array(true_frame_per_mora_list, dtype=numpy.int32)
 
     # Outputs
     frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
@@ -497,7 +499,7 @@ def setUp(self):
         self.yukarin_s_mock = core.yukarin_s_forward
         self.yukarin_sa_mock = core.yukarin_sa_forward
         self.decode_mock = core.decode_forward
-        self.tts_engine = TTSEngine(core=core)
+        self.tts_engine = TTSEngine(core=core)  # type: ignore[arg-type]
 
     def test_to_flatten_moras(self):
         flatten_moras = to_flatten_moras(self.accent_phrases_hello_hiho)
@@ -597,7 +599,7 @@ def test_replace_phoneme_length(self):
         true_result = deepcopy(self.accent_phrases_hello_hiho)
         index = 1
 
-        def result_value(i: int):
+        def result_value(i: int) -> float:
             return float(phoneme_list[i] * 0.5 + 1)
 
         for accent_phrase in true_result:
@@ -624,11 +626,11 @@ def result_value(i: int):
     def test_replace_mora_pitch(self):
         # 空のリストでエラーを吐かないか
         # Inputs
-        phrases = []
+        phrases: list = []
         # Outputs
         result = self.tts_engine.replace_mora_pitch(phrases, style_id=1)
         # Expects
-        true_result = []
+        true_result: list = []
         # Tests
         self.assertEqual(result, true_result)
 
@@ -655,7 +657,7 @@ def test_replace_mora_pitch(self):
         true_result = deepcopy(self.accent_phrases_hello_hiho)
         index = 1
 
-        def result_value(i: int):
+        def result_value(i: int) -> float:
             # unvoiced_mora_phoneme_listのPhoneme ID版
             unvoiced_mora_phoneme_id_list = [
                 Phoneme(p).phoneme_id for p in unvoiced_mora_phoneme_list
diff --git a/test/test_tts_engine_base.py b/test/test_tts_engine_base.py
index 2d8b566b5..168c4e5f1 100644
--- a/test/test_tts_engine_base.py
+++ b/test/test_tts_engine_base.py
@@ -101,7 +101,7 @@ def create_synthesis_test_base(
         text: str,
         expected: list[AccentPhrase],
         enable_interrogative_upspeak: bool,
-    ):
+    ) -> None:
         """音声合成時に疑問文モーラ処理を行っているかどうかを検証
         (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
         """
diff --git a/test/test_user_dict.py b/test/test_user_dict.py
index 6014ac721..f14b5432f 100644
--- a/test/test_user_dict.py
+++ b/test/test_user_dict.py
@@ -2,7 +2,6 @@
 from copy import deepcopy
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Dict
 from unittest import TestCase
 
 from fastapi import HTTPException
@@ -61,7 +60,7 @@
 )
 
 
-def get_new_word(user_dict: Dict[str, UserDictWord]):
+def get_new_word(user_dict: dict[str, UserDictWord]) -> UserDictWord:
     assert len(user_dict) == 2 or (
         len(user_dict) == 1 and "aab7dda2-0d97-43c8-8cb7-3f440dab9b4e" not in user_dict
     )
diff --git a/test/test_user_dict_model.py b/test/test_user_dict_model.py
index 823310157..159909233 100644
--- a/test/test_user_dict_model.py
+++ b/test/test_user_dict_model.py
@@ -1,4 +1,5 @@
 from copy import deepcopy
+from typing import TypedDict
 from unittest import TestCase
 
 from pydantic import ValidationError
@@ -7,9 +8,25 @@
 from voicevox_engine.tts_pipeline.kana_converter import parse_kana
 
 
+class TestModel(TypedDict):
+    surface: str
+    priority: int
+    part_of_speech: str
+    part_of_speech_detail_1: str
+    part_of_speech_detail_2: str
+    part_of_speech_detail_3: str
+    inflectional_type: str
+    inflectional_form: str
+    stem: str
+    yomi: str
+    pronunciation: str
+    accent_type: int
+    accent_associative_rule: str
+
+
 class TestUserDictWords(TestCase):
     def setUp(self):
-        self.test_model = {
+        self.test_model: TestModel = {
             "surface": "テスト",
             "priority": 0,
             "part_of_speech": "名詞",
diff --git a/voicevox_engine/library_manager.py b/voicevox_engine/library_manager.py
index c7f6c6502..3ae43d887 100644
--- a/voicevox_engine/library_manager.py
+++ b/voicevox_engine/library_manager.py
@@ -3,9 +3,8 @@
 import os
 import shutil
 import zipfile
-from io import BytesIO
 from pathlib import Path
-from typing import Dict
+from typing import BinaryIO
 
 from fastapi import HTTPException
 from pydantic import ValidationError
@@ -90,7 +89,7 @@ def downloadable_libraries(self) -> list[DownloadableLibraryInfo]:
                 ]
             return list(map(DownloadableLibraryInfo.parse_obj, libraries))
 
-    def installed_libraries(self) -> Dict[str, InstalledLibraryInfo]:
+    def installed_libraries(self) -> dict[str, InstalledLibraryInfo]:
         """
         インストール済み音声ライブラリの情報を取得
         Returns
@@ -98,7 +97,7 @@ def installed_libraries(self) -> Dict[str, InstalledLibraryInfo]:
         library : Dict[str, InstalledLibraryInfo]
             インストール済みライブラリの情報
         """
-        library: Dict[str, InstalledLibraryInfo] = {}
+        library: dict[str, InstalledLibraryInfo] = {}
         for library_dir in self.library_root_dir.iterdir():
             if library_dir.is_dir():
                 # ライブラリ情報の取得 from `library_root_dir / f"{library_uuid}" / "metas.json"`
@@ -109,7 +108,7 @@ def installed_libraries(self) -> Dict[str, InstalledLibraryInfo]:
                 library[library_uuid] = InstalledLibraryInfo(**info, uninstallable=True)
         return library
 
-    def install_library(self, library_id: str, file: BytesIO) -> Path:
+    def install_library(self, library_id: str, file: BinaryIO) -> Path:
         """
         音声ライブラリ (`.vvlib`) のインストール
         Parameters

From c1291f8b61abd49da9a0362c9d05b6538cd91302 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Mon, 1 Jan 2024 21:51:47 +0900
Subject: [PATCH 098/177] =?UTF-8?q?MAINT:=20=E6=9C=AA=E4=BD=BF=E7=94=A8?=
 =?UTF-8?q?=E3=81=AE=E4=BE=9D=E5=AD=98=E3=83=91=E3=83=83=E3=82=B1=E3=83=BC?=
 =?UTF-8?q?=E3=82=B8=E3=82=92=E5=89=8A=E9=99=A4=20(#962)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 poetry.lock              | 125 ++++++++++++++++++---------------------
 pyproject.toml           |   5 --
 requirements-dev.txt     |   6 +-
 requirements-license.txt |   7 +--
 requirements-test.txt    |   3 +-
 requirements.txt         |   7 +--
 6 files changed, 62 insertions(+), 91 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index c231a3ea8..c36d1a59d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,16 +1,5 @@
 # This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
-[[package]]
-name = "aiofiles"
-version = "0.7.0"
-description = "File support for asyncio."
-optional = false
-python-versions = ">=3.6,<4.0"
-files = [
-    {file = "aiofiles-0.7.0-py3-none-any.whl", hash = "sha256:c67a6823b5f23fcab0a2595a289cec7d8c863ffcb4322fb8cd6b90400aedfdbc"},
-    {file = "aiofiles-0.7.0.tar.gz", hash = "sha256:a1c4fc9b2ff81568c83e21392a82f344ea9d23da906e4f6a52662764545e19d4"},
-]
-
 [[package]]
 name = "altgraph"
 version = "0.17.4"
@@ -56,16 +45,6 @@ files = [
 [package.extras]
 tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
 
-[[package]]
-name = "atomicwrites"
-version = "1.4.1"
-description = "Atomic file writes."
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
-]
-
 [[package]]
 name = "attrs"
 version = "23.1.0"
@@ -535,52 +514,62 @@ test-randomorder = ["pytest-randomly"]
 
 [[package]]
 name = "cython"
-version = "0.29.36"
-description = "The Cython compiler for writing C extensions for the Python language."
-optional = false
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "Cython-0.29.36-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ea33c1c57f331f5653baa1313e445fbe80d1da56dd9a42c8611037887897b9d"},
-    {file = "Cython-0.29.36-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2fe34615c13ace29e77bf9d21c26188d23eff7ad8b3e248da70404e5f5436b95"},
-    {file = "Cython-0.29.36-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ae75eac4f13cbbcb50b2097470dcea570182446a3ebd0f7e95dd425c2017a2d7"},
-    {file = "Cython-0.29.36-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:847d07fc02978c4433d01b4f5ee489b75fd42fd32ccf9cc4b5fd887e8cffe822"},
-    {file = "Cython-0.29.36-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:7cb44aeaf6c5c25bd6a7562ece4eadf50d606fc9b5f624fa95bd0281e8bf0a97"},
-    {file = "Cython-0.29.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:28fb10aabd56a2e4d399273b48e106abe5a0d271728fd5eed3d36e7171000045"},
-    {file = "Cython-0.29.36-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:86b7a13c6b23ab6471d40a320f573fbc8a4e39833947eebed96661145dc34771"},
-    {file = "Cython-0.29.36-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:19ccf7fc527cf556e2e6a3dfeffcadfbcabd24a59a988289117795dfed8a25ad"},
-    {file = "Cython-0.29.36-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:74bddfc7dc8958526b2018d3adc1aa6dc9cf2a24095c972e5ad06758c360b261"},
-    {file = "Cython-0.29.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6c4d7e36fe0211e394adffd296382b435ac22762d14f2fe45c506c230f91cf2d"},
-    {file = "Cython-0.29.36-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:0bca6a7504e8cfc63a4d3c7c9b9a04e5d05501942a6c8cee177363b61a32c2d4"},
-    {file = "Cython-0.29.36-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:17c74f80b06e2fa8ffc8acd41925f4f9922da8a219cd25c6901beab2f7c56cc5"},
-    {file = "Cython-0.29.36-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:25ff471a459aad82146973b0b8c177175ab896051080713d3035ad4418739f66"},
-    {file = "Cython-0.29.36-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a9738f23d00d99481797b155ad58f8fc1c72096926ea2554b8ccc46e1d356c27"},
-    {file = "Cython-0.29.36-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:af2f333f08c4c279f3480532341bf70ec8010bcbc7d8a6daa5ca0bf4513af295"},
-    {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:cd77cedbcc13cb67aef39b8615fd50a67fc42b0c6defea6fc0a21e19d3a062ec"},
-    {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50d506d73a46c4a522ef9fdafcbf7a827ba13907b18ff58f61a8fa0887d0bd8d"},
-    {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:6a571d7c7b52ee12d73bc65b4855779c069545da3bac26bec06a1389ad17ade5"},
-    {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a216b2801c7d9c3babe0a10cc25da3bc92494d7047d1f732d3c47b0cceaf0941"},
-    {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:68abee3be27f21c9642a07a93f8333d491f4c52bc70068e42f51685df9ac1a57"},
-    {file = "Cython-0.29.36-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:1ef90023da8a9bf84cf16f06186db0906d2ce52a09f751e2cb9d3da9d54eae46"},
-    {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9deef0761e8c798043dbb728a1c6df97b26e5edc65b8d6c7608b3c07af3eb722"},
-    {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:69af2365de2343b4e5a61c567e7611ddf2575ae6f6e5c01968f7d4f2747324eb"},
-    {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:fdf377b0f6e9325b73ad88933136023184afdc795caeeaaf3dca13494cffd15e"},
-    {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ff2cc5518558c598028ae8d9a43401e0e734b74b6e598156b005328c9da3472"},
-    {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7ca921068242cd8b52544870c807fe285c1f248b12df7b6dfae25cc9957b965e"},
-    {file = "Cython-0.29.36-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6058a6d04e04d790cda530e1ff675e9352359eb4b777920df3cac2b62a9a030f"},
-    {file = "Cython-0.29.36-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:de2045ceae1857e56a72f08e0acfa48c994277a353b7bdab1f097db9f8803f19"},
-    {file = "Cython-0.29.36-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9f2a4b4587aaef08815410dc20653613ca04a120a2954a92c39e37c6b5fdf6be"},
-    {file = "Cython-0.29.36-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:2edd9f8edca69178d74cbbbc180bc3e848433c9b7dc80374a11a0bb0076c926d"},
-    {file = "Cython-0.29.36-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c6c0aea8491a70f98b7496b5057c9523740e02cec21cd678eef609d2aa6c1257"},
-    {file = "Cython-0.29.36-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:768f65b16d23c630d8829ce1f95520ef1531a9c0489fa872d87c8c3813f65aee"},
-    {file = "Cython-0.29.36-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:568625e8274ee7288ad87b0f615ec36ab446ca9b35e77481ed010027d99c7020"},
-    {file = "Cython-0.29.36-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bdc0a4cb99f55e6878d4b67a4bfee23823484915cb6b7e9c9dd01002dd3592ea"},
-    {file = "Cython-0.29.36-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f0df6552be39853b10dfb5a10dbd08f5c49023d6b390d7ce92d4792a8b6e73ee"},
-    {file = "Cython-0.29.36-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:8894db6f5b6479a3c164e0454e13083ebffeaa9a0822668bb2319bdf1b783df1"},
-    {file = "Cython-0.29.36-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:53f93a8c342e9445a8f0cb7039775294f2dbbe5241936573daeaf0afe30397e4"},
-    {file = "Cython-0.29.36-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ee317f9bcab901a3db39c34ee5a27716f7132e5c0de150125342694d18b30f51"},
-    {file = "Cython-0.29.36-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e4b8269e5a5d127a2191b02b9df3636c0dac73f14f1ff8a831f39cb5197c4f38"},
-    {file = "Cython-0.29.36-py2.py3-none-any.whl", hash = "sha256:95bb13d8be507425d03ebe051f90d4b2a9fdccc64e4f30b35645fdb7542742eb"},
-    {file = "Cython-0.29.36.tar.gz", hash = "sha256:41c0cfd2d754e383c9eeb95effc9aa4ab847d0c9747077ddd7c0dcb68c3bc01f"},
+version = "3.0.7"
+description = "The Cython compiler for writing C extensions in the Python language."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "Cython-3.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3c0e19bb41de6be9d8afc85795159ca16296be81a586cd9588be0400d44a855"},
+    {file = "Cython-3.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e8bf00ec1dd1d92e9ae74d2e6891f087a939e1dfb40c9c7fa5d8d6a26c94f5a"},
+    {file = "Cython-3.0.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd6ae43ef2e596c9a88dbf2a8895be2e32cc2f5bc3c8ba2e7753b69068fc0b2d"},
+    {file = "Cython-3.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f674be92673e87dd8ee7cfe553d5960ec4effc5ab15063b9a5e265a51585a31a"},
+    {file = "Cython-3.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:861cf254bf5836d47c2aee86aa75dd93d3de00ccd1b077c3c7a2bb22cba358e7"},
+    {file = "Cython-3.0.7-cp310-cp310-win32.whl", hash = "sha256:f6d8ff62ad55dc0393686438eac4b457a916e4d1118a0b550746bb52b4c756cc"},
+    {file = "Cython-3.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:e13abb14843397b76d0472c7d33cd260d5f262ab05cc27ed423317e645e29643"},
+    {file = "Cython-3.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c636c9ab92c7838231a1ba769e519d953af8294612f3f772a54d3a5250ff23f"},
+    {file = "Cython-3.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22d2a684122dfb531853d57c8c85c1d5d44be709e12466dca99fa6aee7d8054f"},
+    {file = "Cython-3.0.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1bdf8a107fdf9e174991aa87a0be7504f60de1ec6bfb1ccfb30e33acac818a0"},
+    {file = "Cython-3.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3a83e04fde663b84905f3a20213a4333d13a07b79434300704b70dc552761f8b"},
+    {file = "Cython-3.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e34b4b08d795ccca920fa26b099558f4f1e4e3f794e4ba8d3433c5bc2454d50a"},
+    {file = "Cython-3.0.7-cp311-cp311-win32.whl", hash = "sha256:133057ac45b6fa7fe5d7baada9d3545d09339432f75c0545f556e8c6fecc2932"},
+    {file = "Cython-3.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:b65abca78aa5ebc8675c8480b9a53006f6efea9910ad099cf32c9fb5617ef251"},
+    {file = "Cython-3.0.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ceac5315fe899c229e874328742154e331fa41337bb03f6f5264636c351c9e"},
+    {file = "Cython-3.0.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ea936cf5931297ba07bce121388c4c6266c1b63a9f4d648ae16c92ff090204b"},
+    {file = "Cython-3.0.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9fcd9a18ee3ac7f460e0841954feb495102ffbdbec0e6c78562f3495cda000dd"},
+    {file = "Cython-3.0.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7c8d579d13cb81abe704c8b0908d122b81d6e2623265a19c4a6a7377f440debb"},
+    {file = "Cython-3.0.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ef5bb0268bfe5992da3ef9292463a5a895ed8700b134ed2c00008d5471b3ba6e"},
+    {file = "Cython-3.0.7-cp312-cp312-win32.whl", hash = "sha256:55f93d3822bc196b37a8bdfa4ec6a35232a399e97f2baa714bd5ed8ea9b0ce68"},
+    {file = "Cython-3.0.7-cp312-cp312-win_amd64.whl", hash = "sha256:f3845c4506e0d207c5e268fb02813928f3a1e135de954a379f165ef0d581da47"},
+    {file = "Cython-3.0.7-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ad7c2303a338b2c0b6c6c68f101a6768725934538756096cf3388a5c07a7525"},
+    {file = "Cython-3.0.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fed25959e4025870fdde5f895fcb126196d22affd4f4fad85a2823e0dddc85b0"},
+    {file = "Cython-3.0.7-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79868ec74e4907a8a6e63effe13547c6157f196a162920b1de066da5849ffb8e"},
+    {file = "Cython-3.0.7-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:5e3a038332973b12e72236e8884dc99601a840334c2c46cfbbb5851cb94166eb"},
+    {file = "Cython-3.0.7-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:f2602a5c97a3d618b3b847514204ef3349fb414c59e1126c0c2c708d2c5680f8"},
+    {file = "Cython-3.0.7-cp36-cp36m-win32.whl", hash = "sha256:539ad5a21141e6420035cf616bcba48d999bf878839e52692f97fc7e2f16265c"},
+    {file = "Cython-3.0.7-cp36-cp36m-win_amd64.whl", hash = "sha256:848a28ea49166454c3bff927e5a47629eecf1aa755d6fb3290569cba0fc93766"},
+    {file = "Cython-3.0.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82f27a0134fc6bb46032ca5f728d8af984f3be94a3cb01cb70ff1224e551b9cf"},
+    {file = "Cython-3.0.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79f20c61114c7948cf1214585066406cef4b54a9b935160980e0b6e70ada3a69"},
+    {file = "Cython-3.0.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34d51709e10ad6213b4bf094af7be7ff82bab43216b3c92a07d05b451deeca79"},
+    {file = "Cython-3.0.7-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3f02c7240abab48d59f0d5fef7064f18f01a2a204616165fa6367a8abf5a8832"},
+    {file = "Cython-3.0.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:225f8bba6428b8d711ca2d6c738d2e3a4667f6a2ae40f8a7a5256f69f6a3600e"},
+    {file = "Cython-3.0.7-cp37-cp37m-win32.whl", hash = "sha256:30eb2d2938b9195e2c82951713429aff3ad1be9f104437d1536a04eb0cb3dc0e"},
+    {file = "Cython-3.0.7-cp37-cp37m-win_amd64.whl", hash = "sha256:167b3f3894dcc697cefefac1d198304fae8eb4d5860a7b8bc2459d572e838470"},
+    {file = "Cython-3.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c67105f2c6ccf5b3adbcfaecf3c5c9fa8940f9f97955c9ad7d2542151d97d93"},
+    {file = "Cython-3.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a1859af761977530df2cd5c36e31d54e8d6708ad2c4656e7125c482364dc216"},
+    {file = "Cython-3.0.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01b94304aab87496e81d1f546e71abf57b430b39be4269df1cd7da9928d70b5b"},
+    {file = "Cython-3.0.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:931aade65f77cf59f2a702ac1f549a4836ce221107c740502cbad18d6d8e9511"},
+    {file = "Cython-3.0.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:812b193c26553f1f375d4f1c50f805c227b24ed2d595bc9cdaf78c992ecc64a4"},
+    {file = "Cython-3.0.7-cp38-cp38-win32.whl", hash = "sha256:b227643d8a40b68554dc7d37fcd03fc97b4fb0bd2614aeb5f2e07ab244642d36"},
+    {file = "Cython-3.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:0d8a98c7d86ac4d05b251c39faf49423780381aab55fbf2e147f6e006a34a58a"},
+    {file = "Cython-3.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:816f5285d596062c7ef22790de7d75354b58d4417a9fc64cba914aeeb900db0b"},
+    {file = "Cython-3.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9d0dae6dccd349b8ccf197c10ef2d05c711ca36a649c7eddbab1de2c90b63a1"},
+    {file = "Cython-3.0.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:13211b67b29f6ed8e87c137496c73d93aff0330d97940b4fbed72eae37a4a2a0"},
+    {file = "Cython-3.0.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b1853bc34ced5ff6473e881fcf6de29da83262552c8f268a0df53b49c2b89e2c"},
+    {file = "Cython-3.0.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:51e8164b1270625ff101e95c3c1c234421520c07a0a3a20ded9e9431d98afce7"},
+    {file = "Cython-3.0.7-cp39-cp39-win32.whl", hash = "sha256:45319d2471f4dbf19893ca53785a421107266e18b8cccd2054fce1e3f72a85f1"},
+    {file = "Cython-3.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:612d83fd1eb5aaa5401a755c1f1aafacd9dab404cd350b90d5f404c98b33e4b3"},
+    {file = "Cython-3.0.7-py2.py3-none-any.whl", hash = "sha256:936ec37b261b226d7404eff23a9aad284098338150d42a53d6a9af12b18d3892"},
+    {file = "Cython-3.0.7.tar.gz", hash = "sha256:fb299acf3a578573c190c858d49e0cf9d75f4bc49c3f24c5a63804997ef09213"},
 ]
 
 [[package]]
@@ -2442,4 +2431,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.11"
-content-hash = "cf49f43d1965899202ef766f3912901c6f8309ec29dc83f4902f568109f5cba4"
+content-hash = "2b9225ae66f4dab6a5785858312ecd2188a972698619418e017152ac2b38f41a"
diff --git a/pyproject.toml b/pyproject.toml
index eb421d3e3..b0d75a9a1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,11 +48,9 @@ numpy = "^1.20.0"
 fastapi = "^0.103.2"
 python-multipart = "^0.0.5"
 uvicorn = "^0.15.0"
-aiofiles = "^0.7.0"
 soundfile = "^0.12.1"
 pyyaml = "^6.0"
 pyworld = "^0.3.0"
-requests = "^2.28.1"
 jinja2 = "^3.1.2"
 pyopenjtalk = { git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "b35fc89fe42948a28e33aed886ea145a51113f88" }
 semver = "^3.0.0"
@@ -60,11 +58,8 @@ platformdirs = "^3.10.0"
 soxr = "^0.3.6"
 
 [tool.poetry.group.dev.dependencies]
-cython = "^0.29.34,>=0.29.33" # NOTE: for Python 3.11
 pyinstaller = "^6.2.0"
 pre-commit = "^2.16.0"
-atomicwrites = "^1.4.0"
-colorama = "^0.4.4"
 poetry = "^1.3.1"
 
 [tool.poetry.group.test.dependencies]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index cfdef2322..c9a7736db 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,8 +1,6 @@
-aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 altgraph==0.17.4 ; python_version >= "3.11" and python_version < "3.12"
 anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
 asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
-atomicwrites==1.4.1 ; python_version >= "3.11" and python_version < "3.12"
 attrs==23.1.0 ; python_version >= "3.11" and python_version < "3.12"
 build==0.10.0 ; python_version >= "3.11" and python_version < "3.12"
 cachecontrol[filecache]==0.13.1 ; python_version >= "3.11" and python_version < "3.12"
@@ -12,10 +10,10 @@ cfgv==3.4.0 ; python_version >= "3.11" and python_version < "3.12"
 charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12"
 cleo==2.0.1 ; python_version >= "3.11" and python_version < "3.12"
 click==8.1.7 ; python_version >= "3.11" and python_version < "3.12"
-colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12"
+colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and (platform_system == "Windows" or os_name == "nt")
 crashtest==0.4.1 ; python_version >= "3.11" and python_version < "3.12"
 cryptography==41.0.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "linux"
-cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12"
+cython==3.0.7 ; python_version >= "3.11" and python_version < "3.12"
 distlib==0.3.7 ; python_version >= "3.11" and python_version < "3.12"
 dulwich==0.21.5 ; python_version >= "3.11" and python_version < "3.12"
 fastapi==0.103.2 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/requirements-license.txt b/requirements-license.txt
index 2269d3aec..6da0790fc 100644
--- a/requirements-license.txt
+++ b/requirements-license.txt
@@ -1,12 +1,9 @@
-aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
 asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
-certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12"
 cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12"
-charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12"
 click==8.1.7 ; python_version >= "3.11" and python_version < "3.12"
 colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows"
-cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12"
+cython==3.0.7 ; python_version >= "3.11" and python_version < "3.12"
 fastapi==0.103.2 ; python_version >= "3.11" and python_version < "3.12"
 h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12"
 idna==3.4 ; python_version >= "3.11" and python_version < "3.12"
@@ -22,7 +19,6 @@ pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33ae
 python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12"
 pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12"
 pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12"
-requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12"
 semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
 six==1.16.0 ; python_version >= "3.11" and python_version < "3.12"
 sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
@@ -31,6 +27,5 @@ soxr==0.3.6 ; python_version >= "3.11" and python_version < "3.12"
 starlette==0.27.0 ; python_version >= "3.11" and python_version < "3.12"
 tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12"
 typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12"
-urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12"
 uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12"
 wcwidth==0.2.6 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/requirements-test.txt b/requirements-test.txt
index 771c5769c..46d3f71c5 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,4 +1,3 @@
-aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
 asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
 attrs==23.1.0 ; python_version >= "3.11" and python_version < "3.12"
@@ -16,7 +15,7 @@ coverage==6.5.0 ; python_version >= "3.11" and python_version < "3.12"
 coveralls==3.3.1 ; python_version >= "3.11" and python_version < "3.12"
 crashtest==0.4.1 ; python_version >= "3.11" and python_version < "3.12"
 cryptography==41.0.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "linux"
-cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12"
+cython==3.0.7 ; python_version >= "3.11" and python_version < "3.12"
 dacite==1.8.1 ; python_version >= "3.11" and python_version < "3.12"
 distlib==0.3.7 ; python_version >= "3.11" and python_version < "3.12"
 docopt==0.6.2 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/requirements.txt b/requirements.txt
index 51add151b..aa124e9c6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,9 @@
-aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
 asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
-certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12"
 cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12"
-charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12"
 click==8.1.7 ; python_version >= "3.11" and python_version < "3.12"
 colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows"
-cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12"
+cython==3.0.7 ; python_version >= "3.11" and python_version < "3.12"
 fastapi==0.103.2 ; python_version >= "3.11" and python_version < "3.12"
 h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12"
 idna==3.4 ; python_version >= "3.11" and python_version < "3.12"
@@ -20,7 +17,6 @@ pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33ae
 python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12"
 pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12"
 pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12"
-requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12"
 semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
 six==1.16.0 ; python_version >= "3.11" and python_version < "3.12"
 sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
@@ -29,5 +25,4 @@ soxr==0.3.6 ; python_version >= "3.11" and python_version < "3.12"
 starlette==0.27.0 ; python_version >= "3.11" and python_version < "3.12"
 tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12"
 typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12"
-urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12"
 uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12"

From 7d661a34efd9a363bf526d11510b4dc688e3df00 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 2 Jan 2024 10:15:42 +0900
Subject: [PATCH 099/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20`=5F=5Feq=5F=5F`?=
 =?UTF-8?q?=20(#961)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* remove: `__repr__` / `__eq__`

* revert: `__repr__` 削除
---
 voicevox_engine/tts_pipeline/acoustic_feature_extractor.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index 1c6506a56..b979809dd 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -25,10 +25,6 @@ def __init__(self, phoneme: str):
 
         self.phoneme = phoneme
 
-    def __eq__(self, o: object) -> bool:
-        """Deprecated."""
-        raise NotImplementedError
-
     @property
     def phoneme_id(self) -> int:
         """音素ID (音素リスト内でのindex) を取得する"""

From 8ab18846a6732301e3ba4ec7168a000bee59d028 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 2 Jan 2024 21:38:33 +0900
Subject: [PATCH 100/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`StyleId`=20?=
 =?UTF-8?q?=E5=9E=8B=20(#965)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: `StyleId` 型

* fix: named args

* fix: lint
---
 run.py                                     | 26 +++++++++++++++-------
 test/test_mock_tts_engine.py               |  8 +++----
 test/test_tts_engine.py                    |  8 +++----
 test/test_tts_engine_base.py               |  6 ++---
 voicevox_engine/cancellable_engine.py      |  6 ++---
 voicevox_engine/core_adapter.py            | 17 +++++++++-----
 voicevox_engine/dev/tts_engine/mock.py     |  4 ++--
 voicevox_engine/model.py                   |  5 ++++-
 voicevox_engine/morphing.py                |  6 ++---
 voicevox_engine/tts_pipeline/tts_engine.py | 14 ++++++------
 10 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/run.py b/run.py
index 509dbc11f..778a5ba5e 100644
--- a/run.py
+++ b/run.py
@@ -46,6 +46,7 @@
     ParseKanaError,
     Speaker,
     SpeakerInfo,
+    StyleId,
     StyleIdNotFoundError,
     SupportedDevicesInfo,
     UserDictWord,
@@ -90,16 +91,18 @@
 from voicevox_engine.utility.run_utility import decide_boolean_from_env
 
 
-def get_style_id_from_deprecated(style_id: int | None, speaker_id: int | None) -> int:
+def get_style_id_from_deprecated(
+    style_id: int | None, speaker_id: int | None
+) -> StyleId:
     """
     style_idとspeaker_id両方ともNoneかNoneでないかをチェックし、
     どちらか片方しかNoneが存在しなければstyle_idを返す
     """
     if speaker_id is not None and style_id is None:
         warnings.warn("speakerは非推奨です。style_idを利用してください。", stacklevel=1)
-        return speaker_id
+        return StyleId(speaker_id)
     elif style_id is not None and speaker_id is None:
-        return style_id
+        return StyleId(style_id)
     raise HTTPException(
         status_code=400, detail="speakerとstyle_idが両方とも存在しないか、両方とも存在しています。"
     )
@@ -320,7 +323,7 @@ def audio_query_from_preset(
             raise HTTPException(status_code=422, detail="該当するプリセットIDが見つかりません")
 
         accent_phrases = engine.create_accent_phrases(
-            text, style_id=selected_preset.style_id
+            text, style_id=StyleId(selected_preset.style_id)
         )
         return AudioQuery(
             accent_phrases=accent_phrases,
@@ -1009,7 +1012,9 @@ def initialize_style_id(
         実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。
         """
         core = get_core(core_version)
-        core.initialize_style_id_synthesis(style_id=style_id, skip_reinit=skip_reinit)
+        core.initialize_style_id_synthesis(
+            style_id=StyleId(style_id), skip_reinit=skip_reinit
+        )
         return Response(status_code=204)
 
     @app.get("/is_initialized_style_id", response_model=bool, tags=["その他"])
@@ -1020,7 +1025,8 @@ def is_initialized_style_id(
         """
         指定されたstyle_idのスタイルが初期化されているかどうかを返します。
         """
-        return get_core(core_version).is_initialized_style_id_synthesis(style_id)
+        core = get_core(core_version)
+        return core.is_initialized_style_id_synthesis(StyleId(style_id))
 
     @app.post("/initialize_speaker", status_code=204, tags=["その他"], deprecated=True)
     def initialize_speaker(
@@ -1040,7 +1046,9 @@ def initialize_speaker(
             stacklevel=1,
         )
         return initialize_style_id(
-            style_id=speaker, skip_reinit=skip_reinit, core_version=core_version
+            style_id=StyleId(speaker),
+            skip_reinit=skip_reinit,
+            core_version=core_version,
         )
 
     @app.get(
@@ -1058,7 +1066,9 @@ def is_initialized_speaker(
             "使用しているAPI(/is_initialize_speaker)は非推奨です。/is_initialized_style_idを利用してください。",
             stacklevel=1,
         )
-        return is_initialized_style_id(style_id=speaker, core_version=core_version)
+        return is_initialized_style_id(
+            style_id=StyleId(speaker), core_version=core_version
+        )
 
     @app.get("/user_dict", response_model=dict[str, UserDictWord], tags=["ユーザー辞書"])
     def get_user_dict_words() -> dict[str, UserDictWord]:
diff --git a/test/test_mock_tts_engine.py b/test/test_mock_tts_engine.py
index a7d0cf2d3..0cb1c902e 100644
--- a/test/test_mock_tts_engine.py
+++ b/test/test_mock_tts_engine.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 
 from voicevox_engine.dev.tts_engine import MockTTSEngine
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
+from voicevox_engine.model import AccentPhrase, AudioQuery, Mora, StyleId
 from voicevox_engine.tts_pipeline.kana_converter import create_kana
 
 
@@ -50,14 +50,14 @@ def test_replace_phoneme_length(self):
         """`.replace_phoneme_length()` がエラー無く生成をおこなう"""
         self.engine.replace_phoneme_length(
             accent_phrases=self.accent_phrases_hello_hiho,
-            style_id=0,
+            style_id=StyleId(0),
         )
 
     def test_replace_mora_pitch(self):
         """`.replace_mora_pitch()` がエラー無く生成をおこなう"""
         self.engine.replace_mora_pitch(
             accent_phrases=self.accent_phrases_hello_hiho,
-            style_id=0,
+            style_id=StyleId(0),
         )
 
     def test_synthesis(self):
@@ -75,5 +75,5 @@ def test_synthesis(self):
                 outputStereo=False,
                 kana=create_kana(self.accent_phrases_hello_hiho),
             ),
-            style_id=0,
+            style_id=StyleId(0),
         )
diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index 099e13631..e2a3a2a82 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -5,7 +5,7 @@
 
 import numpy
 
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
+from voicevox_engine.model import AccentPhrase, AudioQuery, Mora, StyleId
 from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
 from voicevox_engine.tts_pipeline.tts_engine import (
@@ -585,7 +585,7 @@ def test_replace_phoneme_length(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_sに渡される値）
-        result = self.tts_engine.replace_phoneme_length(hello_hiho, style_id=1)
+        result = self.tts_engine.replace_phoneme_length(hello_hiho, style_id=StyleId(1))
         yukarin_s_args = self.yukarin_s_mock.call_args[1]
         list_length = yukarin_s_args["length"]
         phoneme_list = yukarin_s_args["phoneme_list"]
@@ -628,7 +628,7 @@ def test_replace_mora_pitch(self):
         # Inputs
         phrases: list = []
         # Outputs
-        result = self.tts_engine.replace_mora_pitch(phrases, style_id=1)
+        result = self.tts_engine.replace_mora_pitch(phrases, style_id=StyleId(1))
         # Expects
         true_result: list = []
         # Tests
@@ -637,7 +637,7 @@ def test_replace_mora_pitch(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_saに渡される値）
-        result = self.tts_engine.replace_mora_pitch(hello_hiho, style_id=1)
+        result = self.tts_engine.replace_mora_pitch(hello_hiho, style_id=StyleId(1))
         yukarin_sa_args = self.yukarin_sa_mock.call_args[1]
         list_length = yukarin_sa_args["length"]
         vowel_phoneme_list = yukarin_sa_args["vowel_phoneme_list"][0]
diff --git a/test/test_tts_engine_base.py b/test/test_tts_engine_base.py
index 168c4e5f1..74e0eee38 100644
--- a/test/test_tts_engine_base.py
+++ b/test/test_tts_engine_base.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 
 from voicevox_engine.dev.core.mock import MockCoreWrapper
-from voicevox_engine.model import AccentPhrase, Mora
+from voicevox_engine.model import AccentPhrase, Mora, StyleId
 from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.tts_engine import (
     apply_interrogative_upspeak,  # FIXME: この関数を使うテストをTTSEngine用のテストに移動する
@@ -105,7 +105,7 @@ def create_synthesis_test_base(
         """音声合成時に疑問文モーラ処理を行っているかどうかを検証
         (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
         """
-        inputs = self.tts_engine.create_accent_phrases(text, 1)
+        inputs = self.tts_engine.create_accent_phrases(text, StyleId(1))
         outputs = apply_interrogative_upspeak(inputs, enable_interrogative_upspeak)
         self.assertEqual(expected, outputs, f"case(text:{text})")
 
@@ -116,7 +116,7 @@ def test_create_accent_phrases(self):
         text = "これはありますか？"
         expected = koreha_arimasuka_base_expected()
         expected[-1].is_interrogative = True
-        actual = self.tts_engine.create_accent_phrases(text, 1)
+        actual = self.tts_engine.create_accent_phrases(text, StyleId(1))
         self.assertEqual(expected, actual, f"case(text:{text})")
 
     def test_upspeak_voiced_last_mora(self):
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index d133b831d..bc75fd2f9 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -17,7 +17,7 @@
 from fastapi import HTTPException, Request
 
 from .core_initializer import initialize_cores
-from .model import AudioQuery
+from .model import AudioQuery, StyleId
 from .tts_pipeline import make_tts_engines_from_cores
 from .utility import get_latest_core_version
 
@@ -141,7 +141,7 @@ def finalize_con(
     def _synthesis_impl(
         self,
         query: AudioQuery,
-        style_id: int,
+        style_id: StyleId,
         request: Request,
         core_version: str | None,
     ) -> str:
@@ -153,7 +153,7 @@ def _synthesis_impl(
         Parameters
         ----------
         query: AudioQuery
-        style_id: int
+        style_id: StyleId
         request: fastapi.Request
             接続確立時に受け取ったものをそのまま渡せばよい
             https://fastapi.tiangolo.com/advanced/using-request-directly/
diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
index 3af7c4a67..950784a05 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core_adapter.py
@@ -4,6 +4,7 @@
 from numpy import ndarray
 
 from .core_wrapper import CoreWrapper, OldCoreError
+from .model import StyleId
 
 
 class CoreAdapter:
@@ -35,13 +36,15 @@ def supported_devices(self) -> str | None:
             supported_devices = None
         return supported_devices
 
-    def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool) -> None:
+    def initialize_style_id_synthesis(
+        self, style_id: StyleId, skip_reinit: bool
+    ) -> None:
         """
         指定したスタイルでの音声合成を初期化する。
         何度も実行可能。未実装の場合は何もしない。
         Parameters
         ----------
-        style_id : int
+        style_id : StyleId
             スタイルID
         skip_reinit : bool
             True の場合, 既に初期化済みの話者の再初期化をスキップします
@@ -56,14 +59,16 @@ def initialize_style_id_synthesis(self, style_id: int, skip_reinit: bool) -> Non
         except OldCoreError:
             pass  # コアが古い場合はどうしようもないので何もしない
 
-    def is_initialized_style_id_synthesis(self, style_id: int) -> bool:
+    def is_initialized_style_id_synthesis(self, style_id: StyleId) -> bool:
         """指定したスタイルでの音声合成が初期化されているかどうかを返す"""
         try:
             return self.core.is_model_loaded(style_id)
         except OldCoreError:
             return True  # コアが古い場合はどうしようもないのでTrueを返す
 
-    def safe_yukarin_s_forward(self, phoneme_list_s: ndarray, style_id: int) -> ndarray:
+    def safe_yukarin_s_forward(
+        self, phoneme_list_s: ndarray, style_id: StyleId
+    ) -> ndarray:
         # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
         with self.mutex:
@@ -82,7 +87,7 @@ def safe_yukarin_sa_forward(
         end_accent_list: ndarray,
         start_accent_phrase_list: ndarray,
         end_accent_phrase_list: ndarray,
-        style_id: int,
+        style_id: StyleId,
     ) -> ndarray:
         # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
@@ -100,7 +105,7 @@ def safe_yukarin_sa_forward(
         return f0_list
 
     def safe_decode_forward(
-        self, phoneme: ndarray, f0: ndarray, style_id: int
+        self, phoneme: ndarray, f0: ndarray, style_id: StyleId
     ) -> tuple[ndarray, int]:
         # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
diff --git a/voicevox_engine/dev/tts_engine/mock.py b/voicevox_engine/dev/tts_engine/mock.py
index ea1a0d204..4ad2188b9 100644
--- a/voicevox_engine/dev/tts_engine/mock.py
+++ b/voicevox_engine/dev/tts_engine/mock.py
@@ -6,7 +6,7 @@
 from pyopenjtalk import tts
 from soxr import resample
 
-from ...model import AudioQuery
+from ...model import AudioQuery, StyleId
 from ...tts_pipeline import TTSEngine
 from ...tts_pipeline.tts_engine import to_flatten_moras
 from ..core.mock import MockCoreWrapper
@@ -21,7 +21,7 @@ def __init__(self):
     def synthesis(
         self,
         query: AudioQuery,
-        style_id: int,
+        style_id: StyleId,
         enable_interrogative_upspeak: bool = True,
     ) -> np.ndarray:
         """音声合成用のクエリに含まれる読み仮名に基づいてOpenJTalkで音声波形を生成する"""
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index d8951b0be..a7fb0e522 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -1,6 +1,6 @@
 from enum import Enum
 from re import findall, fullmatch
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, NewType, Optional
 
 from pydantic import BaseModel, Field, StrictStr, validator
 
@@ -45,6 +45,9 @@ def __hash__(self):
         return hash(tuple(sorted(items)))
 
 
+StyleId = NewType("StyleId", int)
+
+
 class AudioQuery(BaseModel):
     """
     音声合成用のクエリ
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 064908751..00a15c589 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -10,7 +10,7 @@
 from .core_adapter import CoreAdapter
 from .metas.Metas import Speaker, SpeakerStyle, SpeakerSupportPermittedSynthesisMorphing
 from .metas.MetasStore import construct_lookup
-from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
+from .model import AudioQuery, MorphableTargetInfo, StyleId, StyleIdNotFoundError
 from .tts_pipeline import TTSEngine
 
 
@@ -132,8 +132,8 @@ def synthesis_morphing_parameter(
     engine: TTSEngine,
     core: CoreAdapter,
     query: AudioQuery,
-    base_speaker: int,
-    target_speaker: int,
+    base_speaker: StyleId,
+    target_speaker: StyleId,
 ) -> MorphingParameter:
     query = deepcopy(query)
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index a3804dc32..e152da6e5 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -7,7 +7,7 @@
 
 from ..core_adapter import CoreAdapter
 from ..core_wrapper import CoreWrapper
-from ..model import AccentPhrase, AudioQuery, Mora
+from ..model import AccentPhrase, AudioQuery, Mora, StyleId
 from .acoustic_feature_extractor import Phoneme
 from .mora_list import openjtalk_mora2text
 from .text_analyzer import text_to_accent_phrases
@@ -252,7 +252,7 @@ def __init__(self, core: CoreWrapper):
         # NOTE: self._coreは将来的に消す予定
 
     def replace_phoneme_length(
-        self, accent_phrases: list[AccentPhrase], style_id: int
+        self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
         """アクセント句系列に含まれるモーラの音素長属性をスタイルに合わせて更新する"""
         # モーラ系列を抽出する
@@ -282,7 +282,7 @@ def replace_phoneme_length(
         return accent_phrases
 
     def replace_mora_pitch(
-        self, accent_phrases: list[AccentPhrase], style_id: int
+        self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
         """
         accent_phrasesの音高(ピッチ)を設定する
@@ -290,7 +290,7 @@ def replace_mora_pitch(
         ----------
         accent_phrases : List[AccentPhrase]
             アクセント句モデルのリスト
-        style_id : int
+        style_id : StyleId
             スタイルID
         Returns
         -------
@@ -420,7 +420,7 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
         return accent_phrases
 
     def replace_mora_data(
-        self, accent_phrases: list[AccentPhrase], style_id: int
+        self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
         """アクセント句系列の音素長・モーラ音高をスタイルIDに基づいて更新する"""
         return self.replace_mora_pitch(
@@ -430,7 +430,7 @@ def replace_mora_data(
             style_id=style_id,
         )
 
-    def create_accent_phrases(self, text: str, style_id: int) -> list[AccentPhrase]:
+    def create_accent_phrases(self, text: str, style_id: StyleId) -> list[AccentPhrase]:
         """テキストからアクセント句系列を生成し、スタイルIDに基づいてその音素長・モーラ音高を更新する"""
         # 音素とアクセントの推定
         accent_phrases = text_to_accent_phrases(text)
@@ -445,7 +445,7 @@ def create_accent_phrases(self, text: str, style_id: int) -> list[AccentPhrase]:
     def synthesis(
         self,
         query: AudioQuery,
-        style_id: int,
+        style_id: StyleId,
         enable_interrogative_upspeak: bool = True,
     ) -> ndarray:
         """音声合成用のクエリ・スタイルID・疑問文語尾自動調整フラグに基づいて音声波形を生成する"""

From 79a5a3f084cc42817591386f779e09796789ba99 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 2 Jan 2024 23:01:57 +0900
Subject: [PATCH 101/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`OjtPhoneme`=20?=
 =?UTF-8?q?=E5=9E=8B=20(#958)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: `OjtPhoneme` 型

* fix: lint

* add: `OjtUnknown` 型

* Apply suggestions from code review

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/tts_pipeline/text_analyzer.py | 47 +++++++++++++++++--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py
index cd91783e7..3e4294702 100644
--- a/voicevox_engine/tts_pipeline/text_analyzer.py
+++ b/voicevox_engine/tts_pipeline/text_analyzer.py
@@ -1,13 +1,53 @@
 import re
 from dataclasses import dataclass
 from itertools import chain
-from typing import Self
+from typing import Literal, Self
 
 import pyopenjtalk
 
 from ..model import AccentPhrase, Mora
 from .mora_list import openjtalk_mora2text
 
+OjtVowel = Literal[
+    "A", "E", "I", "N", "O", "U", "a", "cl", "e", "i", "o", "pau", "sil", "u"
+]
+OjtConsonant = Literal[
+    "b",
+    "by",
+    "ch",
+    "d",
+    "dy",
+    "f",
+    "g",
+    "gw",
+    "gy",
+    "h",
+    "hy",
+    "j",
+    "k",
+    "kw",
+    "ky",
+    "m",
+    "my",
+    "n",
+    "ny",
+    "p",
+    "py",
+    "r",
+    "ry",
+    "s",
+    "sh",
+    "t",
+    "ts",
+    "ty",
+    "v",
+    "w",
+    "y",
+    "z",
+]
+OjtUnknown = Literal["xx"]
+OjtPhoneme = OjtVowel | OjtConsonant | OjtUnknown
+
 
 @dataclass
 class Label:
@@ -41,9 +81,10 @@ def from_feature(cls, feature: str) -> Self:
         return cls(contexts=contexts)
 
     @property
-    def phoneme(self):
+    def phoneme(self) -> OjtPhoneme:
         """このラベルに含まれる音素。子音 or 母音 (無音含む)。"""
-        return self.contexts["p3"]
+        # FIXME: バリデーションする
+        return self.contexts["p3"]  # type: ignore
 
     def is_pause(self):
         """このラベルが無音 (silent/pause) であれば True、そうでなければ False を返す"""

From b4bc5b054666dd91d380ae677eadc298dbf6da67 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 2 Jan 2024 23:27:20 +0900
Subject: [PATCH 102/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E9=9F=B3?=
 =?UTF-8?q?=E7=B4=A0=20Literal=20=E5=9E=8B=20(#942)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: 音素Literal型

* Sequence -> tuple

* `tuple[Vowel | Consonant, ...]`としないとダメだった

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 .../acoustic_feature_extractor.py             | 99 +++++++++++++++++--
 1 file changed, 93 insertions(+), 6 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index b979809dd..ecb3104bf 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -1,12 +1,97 @@
+from typing import Literal
+
 import numpy
 
+# NOTE: `Vowel` は母音 (a/i/u/e/o の有声・無声) + 無音 pau + 撥音 N ("ん") + 促音 cl ("っ")
+Vowel = Literal["pau", "A", "E", "I", "N", "O", "U", "a", "cl", "e", "i", "o", "u"]
+Consonant = Literal[
+    "b",
+    "by",
+    "ch",
+    "d",
+    "dy",
+    "f",
+    "g",
+    "gw",
+    "gy",
+    "h",
+    "hy",
+    "j",
+    "k",
+    "kw",
+    "ky",
+    "m",
+    "my",
+    "n",
+    "ny",
+    "p",
+    "py",
+    "r",
+    "ry",
+    "s",
+    "sh",
+    "t",
+    "ts",
+    "ty",
+    "v",
+    "w",
+    "y",
+    "z",
+]
+
 # 音素のリスト
-_P_LIST1 = ("pau", "A", "E", "I", "N", "O", "U", "a", "b", "by")
-_P_LIST2 = ("ch", "cl", "d", "dy", "e", "f", "g", "gw", "gy", "h")
-_P_LIST3 = ("hy", "i", "j", "k", "kw", "ky", "m", "my", "n", "ny")
-_P_LIST4 = ("o", "p", "py", "r", "ry", "s", "sh", "t", "ts", "ty")
-_P_LIST5 = ("u", "v", "w", "y", "z")
-_PHONEME_LIST = _P_LIST1 + _P_LIST2 + _P_LIST3 + _P_LIST4 + _P_LIST5
+_P_LIST1: tuple[Vowel | Consonant, ...] = (
+    "pau",
+    "A",
+    "E",
+    "I",
+    "N",
+    "O",
+    "U",
+    "a",
+    "b",
+    "by",
+)
+_P_LIST2: tuple[Vowel | Consonant, ...] = (
+    "ch",
+    "cl",
+    "d",
+    "dy",
+    "e",
+    "f",
+    "g",
+    "gw",
+    "gy",
+    "h",
+)
+_P_LIST3: tuple[Vowel | Consonant, ...] = (
+    "hy",
+    "i",
+    "j",
+    "k",
+    "kw",
+    "ky",
+    "m",
+    "my",
+    "n",
+    "ny",
+)
+_P_LIST4: tuple[Vowel | Consonant, ...] = (
+    "o",
+    "p",
+    "py",
+    "r",
+    "ry",
+    "s",
+    "sh",
+    "t",
+    "ts",
+    "ty",
+)
+_P_LIST5: tuple[Vowel | Consonant, ...] = ("u", "v", "w", "y", "z")
+_PHONEME_LIST: tuple[Vowel | Consonant, ...] = (
+    _P_LIST1 + _P_LIST2 + _P_LIST3 + _P_LIST4 + _P_LIST5
+)
 
 # 音素リストの要素数
 _NUM_PHONEME = len(_PHONEME_LIST)
@@ -24,6 +109,8 @@ def __init__(self, phoneme: str):
             phoneme = "pau"
 
         self.phoneme = phoneme
+        # TODO: `phoneme` で受け入れ可能な文字列を型で保証
+        # self.phoneme: Vowel | Consonant = phoneme
 
     @property
     def phoneme_id(self) -> int:

From dc597c06dcfbb0b24f8a4c1521e8c7652b12b38b Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 3 Jan 2024 12:43:02 +0900
Subject: [PATCH 103/177] =?UTF-8?q?audio=5Fquery=E3=81=AEe2e=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=20(#967)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...\343\201\247\343\201\215\343\202\213.json" | 60 +++++++++++++++++++
 ...\343\201\247\343\201\215\343\202\213.json" | 60 +++++++++++++++++++
 test/e2e/conftest.py                          |  2 +-
 test/e2e/test_audio_query.py                  | 29 +++++++++
 4 files changed, 150 insertions(+), 1 deletion(-)
 create mode 100644 "test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\343\202\202\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
 create mode 100644 "test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
 create mode 100644 test/e2e/test_audio_query.py

diff --git "a/test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\343\202\202\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\343\202\202\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
new file mode 100644
index 000000000..e47234dce
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\343\202\202\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
@@ -0,0 +1,60 @@
+{
+  "accent_phrases": [
+    {
+      "accent": 1,
+      "is_interrogative": false,
+      "moras": [
+        {
+          "consonant": "t",
+          "consonant_length": 2.31,
+          "pitch": 3.38,
+          "text": "テ",
+          "vowel": "e",
+          "vowel_length": 0.88
+        },
+        {
+          "consonant": "s",
+          "consonant_length": 2.19,
+          "pitch": 0.0,
+          "text": "ス",
+          "vowel": "U",
+          "vowel_length": 0.38
+        },
+        {
+          "consonant": "t",
+          "consonant_length": 2.31,
+          "pitch": 4.19,
+          "text": "ト",
+          "vowel": "o",
+          "vowel_length": 1.88
+        },
+        {
+          "consonant": "d",
+          "consonant_length": 0.75,
+          "pitch": 1.62,
+          "text": "デ",
+          "vowel": "e",
+          "vowel_length": 0.88
+        },
+        {
+          "consonant": "s",
+          "consonant_length": 2.19,
+          "pitch": 0.0,
+          "text": "ス",
+          "vowel": "U",
+          "vowel_length": 0.38
+        }
+      ],
+      "pause_mora": null
+    }
+  ],
+  "intonationScale": 1.0,
+  "kana": "テ'_ストデ_ス",
+  "outputSamplingRate": 24000,
+  "outputStereo": false,
+  "pitchScale": 0.0,
+  "postPhonemeLength": 0.1,
+  "prePhonemeLength": 0.1,
+  "speedScale": 1.0,
+  "volumeScale": 1.0
+}
diff --git "a/test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
new file mode 100644
index 000000000..e47234dce
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
@@ -0,0 +1,60 @@
+{
+  "accent_phrases": [
+    {
+      "accent": 1,
+      "is_interrogative": false,
+      "moras": [
+        {
+          "consonant": "t",
+          "consonant_length": 2.31,
+          "pitch": 3.38,
+          "text": "テ",
+          "vowel": "e",
+          "vowel_length": 0.88
+        },
+        {
+          "consonant": "s",
+          "consonant_length": 2.19,
+          "pitch": 0.0,
+          "text": "ス",
+          "vowel": "U",
+          "vowel_length": 0.38
+        },
+        {
+          "consonant": "t",
+          "consonant_length": 2.31,
+          "pitch": 4.19,
+          "text": "ト",
+          "vowel": "o",
+          "vowel_length": 1.88
+        },
+        {
+          "consonant": "d",
+          "consonant_length": 0.75,
+          "pitch": 1.62,
+          "text": "デ",
+          "vowel": "e",
+          "vowel_length": 0.88
+        },
+        {
+          "consonant": "s",
+          "consonant_length": 2.19,
+          "pitch": 0.0,
+          "text": "ス",
+          "vowel": "U",
+          "vowel_length": 0.38
+        }
+      ],
+      "pause_mora": null
+    }
+  ],
+  "intonationScale": 1.0,
+  "kana": "テ'_ストデ_ス",
+  "outputSamplingRate": 24000,
+  "outputStereo": false,
+  "pitchScale": 0.0,
+  "postPhonemeLength": 0.1,
+  "prePhonemeLength": 0.1,
+  "speedScale": 1.0,
+  "volumeScale": 1.0
+}
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 7981d1457..6f7fda132 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -28,7 +28,7 @@ def snapshot_json(snapshot: SnapshotAssertion) -> SnapshotAssertion:
 
 @pytest.fixture(scope="session")
 def app_params():
-    cores = initialize_cores(use_gpu=False)
+    cores = initialize_cores(use_gpu=False, enable_mock=True)
     tts_engines = make_tts_engines_from_cores(cores)
     latest_core_version = get_latest_core_version(versions=list(tts_engines.keys()))
     setting_loader = SettingLoader(Path("./not_exist.yaml"))
diff --git a/test/e2e/test_audio_query.py b/test/e2e/test_audio_query.py
new file mode 100644
index 000000000..0be032894
--- /dev/null
+++ b/test/e2e/test_audio_query.py
@@ -0,0 +1,29 @@
+"""
+AudioQuery APIのテスト
+"""
+
+from fastapi.testclient import TestClient
+from syrupy.extensions.json import JSONSnapshotExtension
+
+
+def test_style_idを指定して音声合成クエリが取得できる(
+    client: TestClient, snapshot_json: JSONSnapshotExtension
+) -> None:
+    response = client.post("/audio_query", params={"text": "テストです", "style_id": 0})
+    assert response.status_code == 200
+    assert snapshot_json == response.json()
+
+
+def test_speakerを指定しても音声合成クエリが取得できる(
+    client: TestClient, snapshot_json: JSONSnapshotExtension
+) -> None:
+    response = client.post("/audio_query", params={"text": "テストです", "speaker": 0})
+    assert response.status_code == 200
+    assert snapshot_json == response.json()
+
+
+def test_style_idとspeakerを両方指定するとエラー(client: TestClient) -> None:
+    response = client.post(
+        "/audio_query", params={"text": "テストです", "style_id": 0, "speaker": 0}
+    )
+    assert response.status_code == 400

From 6452c89d97df30900b2edbea9d900094c5e02f7a Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 3 Jan 2024 12:46:52 +0900
Subject: [PATCH 104/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E5=90=8D?=
 =?UTF-8?q?=E5=89=8D=E4=BB=98=E3=81=8D=E5=BC=95=E6=95=B0=E5=89=8A=E9=99=A4?=
 =?UTF-8?q?=20(#968)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: 名前付き引数削除
---
 run.py                                     | 43 +++++++---------------
 test/test_mock_tts_engine.py               | 12 ++----
 test/test_tts_engine.py                    |  6 +--
 voicevox_engine/morphing.py                |  4 +-
 voicevox_engine/tts_pipeline/tts_engine.py |  9 +----
 5 files changed, 23 insertions(+), 51 deletions(-)

diff --git a/run.py b/run.py
index 778a5ba5e..33a2b15ca 100644
--- a/run.py
+++ b/run.py
@@ -281,7 +281,7 @@ def audio_query(
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
         core = get_core(core_version)
-        accent_phrases = engine.create_accent_phrases(text, style_id=style_id)
+        accent_phrases = engine.create_accent_phrases(text, style_id)
         return AudioQuery(
             accent_phrases=accent_phrases,
             speedScale=1,
@@ -323,7 +323,7 @@ def audio_query_from_preset(
             raise HTTPException(status_code=422, detail="該当するプリセットIDが見つかりません")
 
         accent_phrases = engine.create_accent_phrases(
-            text, style_id=StyleId(selected_preset.style_id)
+            text, StyleId(selected_preset.style_id)
         )
         return AudioQuery(
             accent_phrases=accent_phrases,
@@ -376,13 +376,11 @@ def accent_phrases(
                     status_code=400,
                     detail=ParseKanaBadRequest(err).dict(),
                 )
-            accent_phrases = engine.replace_mora_data(
-                accent_phrases=accent_phrases, style_id=style_id
-            )
+            accent_phrases = engine.replace_mora_data(accent_phrases, style_id)
 
             return accent_phrases
         else:
-            return engine.create_accent_phrases(text, style_id=style_id)
+            return engine.create_accent_phrases(text, style_id)
 
     @app.post(
         "/mora_data",
@@ -398,7 +396,7 @@ def mora_data(
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
-        return engine.replace_mora_data(accent_phrases, style_id=style_id)
+        return engine.replace_mora_data(accent_phrases, style_id)
 
     @app.post(
         "/mora_length",
@@ -414,9 +412,7 @@ def mora_length(
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
-        return engine.replace_phoneme_length(
-            accent_phrases=accent_phrases, style_id=style_id
-        )
+        return engine.replace_phoneme_length(accent_phrases, style_id)
 
     @app.post(
         "/mora_pitch",
@@ -432,9 +428,7 @@ def mora_pitch(
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
-        return engine.replace_mora_pitch(
-            accent_phrases=accent_phrases, style_id=style_id
-        )
+        return engine.replace_mora_pitch(accent_phrases, style_id)
 
     @app.post(
         "/synthesis",
@@ -462,9 +456,7 @@ def synthesis(
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
         wave = engine.synthesis(
-            query=query,
-            style_id=style_id,
-            enable_interrogative_upspeak=enable_interrogative_upspeak,
+            query, style_id, enable_interrogative_upspeak=enable_interrogative_upspeak
         )
 
         with NamedTemporaryFile(delete=False) as f:
@@ -505,10 +497,7 @@ def cancellable_synthesis(
                 detail="実験的機能はデフォルトで無効になっています。使用するには引数を指定してください。",
             )
         f_name = cancellable_engine._synthesis_impl(
-            query=query,
-            style_id=style_id,
-            request=request,
-            core_version=core_version,
+            query, style_id, request, core_version=core_version
         )
         if f_name == "":
             raise HTTPException(status_code=422, detail="不明なバージョンです")
@@ -553,7 +542,7 @@ def multi_synthesis(
                         )
 
                     with TemporaryFile() as wav_file:
-                        wave = engine.synthesis(query=queries[i], style_id=style_id)
+                        wave = engine.synthesis(queries[i], style_id)
                         soundfile.write(
                             file=wav_file,
                             data=wave,
@@ -1012,9 +1001,7 @@ def initialize_style_id(
         実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。
         """
         core = get_core(core_version)
-        core.initialize_style_id_synthesis(
-            style_id=StyleId(style_id), skip_reinit=skip_reinit
-        )
+        core.initialize_style_id_synthesis(StyleId(style_id), skip_reinit=skip_reinit)
         return Response(status_code=204)
 
     @app.get("/is_initialized_style_id", response_model=bool, tags=["その他"])
@@ -1046,9 +1033,7 @@ def initialize_speaker(
             stacklevel=1,
         )
         return initialize_style_id(
-            style_id=StyleId(speaker),
-            skip_reinit=skip_reinit,
-            core_version=core_version,
+            StyleId(speaker), skip_reinit=skip_reinit, core_version=core_version
         )
 
     @app.get(
@@ -1066,9 +1051,7 @@ def is_initialized_speaker(
             "使用しているAPI(/is_initialize_speaker)は非推奨です。/is_initialized_style_idを利用してください。",
             stacklevel=1,
         )
-        return is_initialized_style_id(
-            style_id=StyleId(speaker), core_version=core_version
-        )
+        return is_initialized_style_id(StyleId(speaker), core_version=core_version)
 
     @app.get("/user_dict", response_model=dict[str, UserDictWord], tags=["ユーザー辞書"])
     def get_user_dict_words() -> dict[str, UserDictWord]:
diff --git a/test/test_mock_tts_engine.py b/test/test_mock_tts_engine.py
index 0cb1c902e..f45cf1130 100644
--- a/test/test_mock_tts_engine.py
+++ b/test/test_mock_tts_engine.py
@@ -48,17 +48,11 @@ def setUp(self):
 
     def test_replace_phoneme_length(self):
         """`.replace_phoneme_length()` がエラー無く生成をおこなう"""
-        self.engine.replace_phoneme_length(
-            accent_phrases=self.accent_phrases_hello_hiho,
-            style_id=StyleId(0),
-        )
+        self.engine.replace_phoneme_length(self.accent_phrases_hello_hiho, StyleId(0))
 
     def test_replace_mora_pitch(self):
         """`.replace_mora_pitch()` がエラー無く生成をおこなう"""
-        self.engine.replace_mora_pitch(
-            accent_phrases=self.accent_phrases_hello_hiho,
-            style_id=StyleId(0),
-        )
+        self.engine.replace_mora_pitch(self.accent_phrases_hello_hiho, StyleId(0))
 
     def test_synthesis(self):
         """`.synthesis()` がエラー無く生成をおこなう"""
@@ -75,5 +69,5 @@ def test_synthesis(self):
                 outputStereo=False,
                 kana=create_kana(self.accent_phrases_hello_hiho),
             ),
-            style_id=StyleId(0),
+            StyleId(0),
         )
diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index e2a3a2a82..27cfb9a45 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -585,7 +585,7 @@ def test_replace_phoneme_length(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_sに渡される値）
-        result = self.tts_engine.replace_phoneme_length(hello_hiho, style_id=StyleId(1))
+        result = self.tts_engine.replace_phoneme_length(hello_hiho, StyleId(1))
         yukarin_s_args = self.yukarin_s_mock.call_args[1]
         list_length = yukarin_s_args["length"]
         phoneme_list = yukarin_s_args["phoneme_list"]
@@ -628,7 +628,7 @@ def test_replace_mora_pitch(self):
         # Inputs
         phrases: list = []
         # Outputs
-        result = self.tts_engine.replace_mora_pitch(phrases, style_id=StyleId(1))
+        result = self.tts_engine.replace_mora_pitch(phrases, StyleId(1))
         # Expects
         true_result: list = []
         # Tests
@@ -637,7 +637,7 @@ def test_replace_mora_pitch(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_saに渡される値）
-        result = self.tts_engine.replace_mora_pitch(hello_hiho, style_id=StyleId(1))
+        result = self.tts_engine.replace_mora_pitch(hello_hiho, StyleId(1))
         yukarin_sa_args = self.yukarin_sa_mock.call_args[1]
         list_length = yukarin_sa_args["length"]
         vowel_phoneme_list = yukarin_sa_args["vowel_phoneme_list"][0]
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 00a15c589..33f4b2196 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -143,8 +143,8 @@ def synthesis_morphing_parameter(
     # WORLDに掛けるため合成はモノラルで行う
     query.outputStereo = False
 
-    base_wave = engine.synthesis(query=query, style_id=base_speaker).astype("float")
-    target_wave = engine.synthesis(query=query, style_id=target_speaker).astype("float")
+    base_wave = engine.synthesis(query, style_id=base_speaker).astype("float")
+    target_wave = engine.synthesis(query, style_id=target_speaker).astype("float")
 
     return create_morphing_parameter(
         base_wave=base_wave,
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index e152da6e5..ddd02afec 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -424,9 +424,7 @@ def replace_mora_data(
     ) -> list[AccentPhrase]:
         """アクセント句系列の音素長・モーラ音高をスタイルIDに基づいて更新する"""
         return self.replace_mora_pitch(
-            accent_phrases=self.replace_phoneme_length(
-                accent_phrases=accent_phrases, style_id=style_id
-            ),
+            accent_phrases=self.replace_phoneme_length(accent_phrases, style_id),
             style_id=style_id,
         )
 
@@ -436,10 +434,7 @@ def create_accent_phrases(self, text: str, style_id: StyleId) -> list[AccentPhra
         accent_phrases = text_to_accent_phrases(text)
 
         # 音素長・モーラ音高の推定と更新
-        accent_phrases = self.replace_mora_data(
-            accent_phrases=accent_phrases,
-            style_id=style_id,
-        )
+        accent_phrases = self.replace_mora_data(accent_phrases, style_id)
         return accent_phrases
 
     def synthesis(

From 08816486a4918fe574da65f9d11beb863f569de5 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Wed, 3 Jan 2024 18:01:04 +0900
Subject: [PATCH 105/177] =?UTF-8?q?FIX:=20=E6=9C=AA=E5=87=A6=E7=90=86?=
 =?UTF-8?q?=E3=81=AE=E4=BE=8B=E5=A4=96=E3=81=8C=E7=99=BA=E7=94=9F=E3=81=99?=
 =?UTF-8?q?=E3=82=8B=E3=81=A8CORSMiddleware=E3=81=8C=E9=81=A9=E7=94=A8?=
 =?UTF-8?q?=E3=81=95=E3=82=8C=E3=81=AA=E3=81=84=E5=95=8F=E9=A1=8C=E3=81=AB?=
 =?UTF-8?q?=E5=AF=BE=E3=81=99=E3=82=8B=E3=83=AF=E3=83=BC=E3=82=AF=E3=82=A2?=
 =?UTF-8?q?=E3=83=A9=E3=82=A6=E3=83=B3=E3=83=89=20(#969)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 run.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/run.py b/run.py
index 33a2b15ca..ba29fc05e 100644
--- a/run.py
+++ b/run.py
@@ -14,7 +14,7 @@
 from io import BytesIO, TextIOWrapper
 from pathlib import Path
 from tempfile import NamedTemporaryFile, TemporaryFile
-from typing import Annotated, Any, Dict, List, Optional
+from typing import Annotated, Any, Optional
 
 import soundfile
 import uvicorn
@@ -25,6 +25,7 @@
 from fastapi.templating import Jinja2Templates
 from pydantic import ValidationError
 from starlette.background import BackgroundTask
+from starlette.middleware.errors import ServerErrorMiddleware
 from starlette.responses import FileResponse
 
 from voicevox_engine import __version__
@@ -145,15 +146,15 @@ def set_output_log_utf8() -> None:
 
 
 def generate_app(
-    tts_engines: Dict[str, TTSEngine],
-    cores: Dict[str, CoreAdapter],
+    tts_engines: dict[str, TTSEngine],
+    cores: dict[str, CoreAdapter],
     latest_core_version: str,
     setting_loader: SettingLoader,
     preset_manager: PresetManager,
     cancellable_engine: CancellableEngine | None = None,
     root_dir: Optional[Path] = None,
     cors_policy_mode: CorsPolicyMode = CorsPolicyMode.localapps,
-    allow_origin: Optional[List[str]] = None,
+    allow_origin: Optional[list[str]] = None,
     disable_mutable_api: bool = False,
 ) -> FastAPI:
     if root_dir is None:
@@ -165,6 +166,16 @@ def generate_app(
         version=__version__,
     )
 
+    # 未処理の例外が発生するとCORSMiddlewareが適用されない問題に対するワークアラウンド
+    # ref: https://github.com/VOICEVOX/voicevox_engine/issues/91
+    async def global_execution_handler(request: Request, exc: Exception) -> Response:
+        return JSONResponse(
+            status_code=500,
+            content="Internal Server Error",
+        )
+
+    app.add_middleware(ServerErrorMiddleware, handler=global_execution_handler)
+
     # CORS用のヘッダを生成するミドルウェア
     localhost_regex = "^https?://(localhost|127\\.0\\.0\\.1)(:[0-9]+)?$"
     compiled_localhost_regex = re.compile(localhost_regex)

From 938f8aea519bcc8625f901a3f720f72a1e8ad9c5 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 3 Jan 2024 18:54:56 +0900
Subject: [PATCH 106/177] =?UTF-8?q?API=E3=81=AEstyle=5Fid=E3=81=AE?=
 =?UTF-8?q?=E5=9E=8B=E3=82=92StyleId=E5=9E=8B=E3=81=AB=20(#966)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* run.pyのとこStyleIdに

* API引数をStyleId化

* 間違えて追加してしまっていた

* 漏れ

* StyleIdの場所変更

* pysen

* 自動import箇所が意図とあってなさそうだった

* ワークアラウンドなことをコメント、FIXMEをコメント
---
 run.py                                     | 64 +++++++++++-----------
 test/test_mock_tts_engine.py               |  3 +-
 test/test_tts_engine.py                    |  3 +-
 test/test_tts_engine_base.py               |  3 +-
 voicevox_engine/cancellable_engine.py      |  3 +-
 voicevox_engine/core_adapter.py            |  2 +-
 voicevox_engine/dev/tts_engine/mock.py     |  3 +-
 voicevox_engine/metas/Metas.py             | 10 +++-
 voicevox_engine/model.py                   |  5 +-
 voicevox_engine/morphing.py                | 11 +++-
 voicevox_engine/preset/Preset.py           |  4 +-
 voicevox_engine/tts_pipeline/tts_engine.py |  3 +-
 12 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/run.py b/run.py
index ba29fc05e..dfb848bdf 100644
--- a/run.py
+++ b/run.py
@@ -35,6 +35,7 @@
 from voicevox_engine.engine_manifest import EngineManifestLoader
 from voicevox_engine.engine_manifest.EngineManifest import EngineManifest
 from voicevox_engine.library_manager import LibraryManager
+from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.metas.MetasStore import MetasStore, construct_lookup
 from voicevox_engine.model import (
     AccentPhrase,
@@ -47,7 +48,6 @@
     ParseKanaError,
     Speaker,
     SpeakerInfo,
-    StyleId,
     StyleIdNotFoundError,
     SupportedDevicesInfo,
     UserDictWord,
@@ -93,7 +93,7 @@
 
 
 def get_style_id_from_deprecated(
-    style_id: int | None, speaker_id: int | None
+    style_id: StyleId | None, speaker_id: StyleId | None
 ) -> StyleId:
     """
     style_idとspeaker_id両方ともNoneかNoneでないかをチェックし、
@@ -101,9 +101,9 @@ def get_style_id_from_deprecated(
     """
     if speaker_id is not None and style_id is None:
         warnings.warn("speakerは非推奨です。style_idを利用してください。", stacklevel=1)
-        return StyleId(speaker_id)
+        return speaker_id
     elif style_id is not None and speaker_id is None:
-        return StyleId(style_id)
+        return style_id
     raise HTTPException(
         status_code=400, detail="speakerとstyle_idが両方とも存在しないか、両方とも存在しています。"
     )
@@ -282,8 +282,8 @@ def get_core(core_version: Optional[str]) -> CoreAdapter:
     )
     def audio_query(
         text: str,
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> AudioQuery:
         """
@@ -333,9 +333,7 @@ def audio_query_from_preset(
         else:
             raise HTTPException(status_code=422, detail="該当するプリセットIDが見つかりません")
 
-        accent_phrases = engine.create_accent_phrases(
-            text, StyleId(selected_preset.style_id)
-        )
+        accent_phrases = engine.create_accent_phrases(text, selected_preset.style_id)
         return AudioQuery(
             accent_phrases=accent_phrases,
             speedScale=selected_preset.speedScale,
@@ -363,8 +361,8 @@ def audio_query_from_preset(
     )
     def accent_phrases(
         text: str,
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         is_kana: bool = False,
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
@@ -401,8 +399,8 @@ def accent_phrases(
     )
     def mora_data(
         accent_phrases: list[AccentPhrase],
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
@@ -417,8 +415,8 @@ def mora_data(
     )
     def mora_length(
         accent_phrases: list[AccentPhrase],
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
@@ -433,8 +431,8 @@ def mora_length(
     )
     def mora_pitch(
         accent_phrases: list[AccentPhrase],
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
@@ -456,8 +454,8 @@ def mora_pitch(
     )
     def synthesis(
         query: AudioQuery,
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         enable_interrogative_upspeak: bool = Query(  # noqa: B008
             default=True,
             description="疑問系のテキストが与えられたら語尾を自動調整する",
@@ -497,8 +495,8 @@ def synthesis(
     def cancellable_synthesis(
         query: AudioQuery,
         request: Request,
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
@@ -536,8 +534,8 @@ def cancellable_synthesis(
     )
     def multi_synthesis(
         queries: list[AudioQuery],
-        style_id: int | None = Query(default=None),  # noqa: B008
-        speaker: int | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
@@ -576,7 +574,7 @@ def multi_synthesis(
         summary="指定した話者に対してエンジン内の話者がモーフィングが可能か判定する",
     )
     def morphable_targets(
-        base_speakers: list[int],
+        base_speakers: list[int],  # FIXME: StyleId型にする
         core_version: str | None = None,
     ) -> list[dict[str, MorphableTargetInfo]]:
         """
@@ -617,7 +615,7 @@ def morphable_targets(
     )
     def _synthesis_morphing(
         query: AudioQuery,
-        base_speaker: int,
+        base_speaker: int,  # FIXME: StyleId型にする
         target_speaker: int,
         morph_rate: float = Query(..., ge=0.0, le=1.0),  # noqa: B008
         core_version: str | None = None,
@@ -1001,7 +999,7 @@ def uninstall_library(library_uuid: str) -> Response:
 
     @app.post("/initialize_style_id", status_code=204, tags=["その他"])
     def initialize_style_id(
-        style_id: int,
+        style_id: StyleId,
         skip_reinit: bool = Query(  # noqa: B008
             False, description="既に初期化済みのスタイルの再初期化をスキップするかどうか"
         ),
@@ -1012,23 +1010,23 @@ def initialize_style_id(
         実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。
         """
         core = get_core(core_version)
-        core.initialize_style_id_synthesis(StyleId(style_id), skip_reinit=skip_reinit)
+        core.initialize_style_id_synthesis(style_id, skip_reinit=skip_reinit)
         return Response(status_code=204)
 
     @app.get("/is_initialized_style_id", response_model=bool, tags=["その他"])
     def is_initialized_style_id(
-        style_id: int,
+        style_id: StyleId,
         core_version: str | None = None,
     ) -> bool:
         """
         指定されたstyle_idのスタイルが初期化されているかどうかを返します。
         """
         core = get_core(core_version)
-        return core.is_initialized_style_id_synthesis(StyleId(style_id))
+        return core.is_initialized_style_id_synthesis(style_id)
 
     @app.post("/initialize_speaker", status_code=204, tags=["その他"], deprecated=True)
     def initialize_speaker(
-        speaker: int,
+        speaker: StyleId,
         skip_reinit: bool = Query(  # noqa: B008
             False, description="既に初期化済みの話者の再初期化をスキップするかどうか"
         ),
@@ -1044,14 +1042,14 @@ def initialize_speaker(
             stacklevel=1,
         )
         return initialize_style_id(
-            StyleId(speaker), skip_reinit=skip_reinit, core_version=core_version
+            speaker, skip_reinit=skip_reinit, core_version=core_version
         )
 
     @app.get(
         "/is_initialized_speaker", response_model=bool, tags=["その他"], deprecated=True
     )
     def is_initialized_speaker(
-        speaker: int,
+        speaker: StyleId,
         core_version: str | None = None,
     ) -> bool:
         """
@@ -1062,7 +1060,7 @@ def is_initialized_speaker(
             "使用しているAPI(/is_initialize_speaker)は非推奨です。/is_initialized_style_idを利用してください。",
             stacklevel=1,
         )
-        return is_initialized_style_id(StyleId(speaker), core_version=core_version)
+        return is_initialized_style_id(speaker, core_version=core_version)
 
     @app.get("/user_dict", response_model=dict[str, UserDictWord], tags=["ユーザー辞書"])
     def get_user_dict_words() -> dict[str, UserDictWord]:
diff --git a/test/test_mock_tts_engine.py b/test/test_mock_tts_engine.py
index f45cf1130..4161d2e51 100644
--- a/test/test_mock_tts_engine.py
+++ b/test/test_mock_tts_engine.py
@@ -1,7 +1,8 @@
 from unittest import TestCase
 
 from voicevox_engine.dev.tts_engine import MockTTSEngine
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora, StyleId
+from voicevox_engine.metas.Metas import StyleId
+from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.kana_converter import create_kana
 
 
diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index 27cfb9a45..b0eeb0db4 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -5,7 +5,8 @@
 
 import numpy
 
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora, StyleId
+from voicevox_engine.metas.Metas import StyleId
+from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
 from voicevox_engine.tts_pipeline.tts_engine import (
diff --git a/test/test_tts_engine_base.py b/test/test_tts_engine_base.py
index 74e0eee38..a81e5f301 100644
--- a/test/test_tts_engine_base.py
+++ b/test/test_tts_engine_base.py
@@ -1,7 +1,8 @@
 from unittest import TestCase
 
 from voicevox_engine.dev.core.mock import MockCoreWrapper
-from voicevox_engine.model import AccentPhrase, Mora, StyleId
+from voicevox_engine.metas.Metas import StyleId
+from voicevox_engine.model import AccentPhrase, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.tts_engine import (
     apply_interrogative_upspeak,  # FIXME: この関数を使うテストをTTSEngine用のテストに移動する
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index bc75fd2f9..8bbc95d6d 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -17,7 +17,8 @@
 from fastapi import HTTPException, Request
 
 from .core_initializer import initialize_cores
-from .model import AudioQuery, StyleId
+from .metas.Metas import StyleId
+from .model import AudioQuery
 from .tts_pipeline import make_tts_engines_from_cores
 from .utility import get_latest_core_version
 
diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
index 950784a05..1d9bc1ed6 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core_adapter.py
@@ -4,7 +4,7 @@
 from numpy import ndarray
 
 from .core_wrapper import CoreWrapper, OldCoreError
-from .model import StyleId
+from .metas.Metas import StyleId
 
 
 class CoreAdapter:
diff --git a/voicevox_engine/dev/tts_engine/mock.py b/voicevox_engine/dev/tts_engine/mock.py
index 4ad2188b9..e6a009c6c 100644
--- a/voicevox_engine/dev/tts_engine/mock.py
+++ b/voicevox_engine/dev/tts_engine/mock.py
@@ -6,7 +6,8 @@
 from pyopenjtalk import tts
 from soxr import resample
 
-from ...model import AudioQuery, StyleId
+from ...metas.Metas import StyleId
+from ...model import AudioQuery
 from ...tts_pipeline import TTSEngine
 from ...tts_pipeline.tts_engine import to_flatten_moras
 from ..core.mock import MockCoreWrapper
diff --git a/voicevox_engine/metas/Metas.py b/voicevox_engine/metas/Metas.py
index 4eb1e0a46..bc615a16f 100644
--- a/voicevox_engine/metas/Metas.py
+++ b/voicevox_engine/metas/Metas.py
@@ -1,8 +1,12 @@
 from enum import Enum
-from typing import List, Optional
+from typing import List, NewType, Optional
 
 from pydantic import BaseModel, Field
 
+# NOTE: 循環importを防ぐためにとりあえずここに書いている
+# FIXME: 他のmodelに依存せず、全modelから参照できる場所に配置する
+StyleId = NewType("StyleId", int)
+
 
 class SpeakerStyle(BaseModel):
     """
@@ -10,7 +14,7 @@ class SpeakerStyle(BaseModel):
     """
 
     name: str = Field(title="スタイル名")
-    id: int = Field(title="スタイルID")
+    id: StyleId = Field(title="スタイルID")
 
 
 class SpeakerSupportPermittedSynthesisMorphing(str, Enum):
@@ -67,7 +71,7 @@ class StyleInfo(BaseModel):
     スタイルの追加情報
     """
 
-    id: int = Field(title="スタイルID")
+    id: StyleId = Field(title="スタイルID")
     icon: str = Field(title="当該スタイルのアイコンをbase64エンコードしたもの")
     portrait: Optional[str] = Field(title="当該スタイルのportrait.pngをbase64エンコードしたもの")
     voice_samples: List[str] = Field(title="voice_sampleのwavファイルをbase64エンコードしたもの")
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index a7fb0e522..d8951b0be 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -1,6 +1,6 @@
 from enum import Enum
 from re import findall, fullmatch
-from typing import Any, Dict, List, NewType, Optional
+from typing import Any, Dict, List, Optional
 
 from pydantic import BaseModel, Field, StrictStr, validator
 
@@ -45,9 +45,6 @@ def __hash__(self):
         return hash(tuple(sorted(items)))
 
 
-StyleId = NewType("StyleId", int)
-
-
 class AudioQuery(BaseModel):
     """
     音声合成用のクエリ
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 33f4b2196..d5daaafc7 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -8,9 +8,14 @@
 from soxr import resample
 
 from .core_adapter import CoreAdapter
-from .metas.Metas import Speaker, SpeakerStyle, SpeakerSupportPermittedSynthesisMorphing
+from .metas.Metas import (
+    Speaker,
+    SpeakerStyle,
+    SpeakerSupportPermittedSynthesisMorphing,
+    StyleId,
+)
 from .metas.MetasStore import construct_lookup
-from .model import AudioQuery, MorphableTargetInfo, StyleId, StyleIdNotFoundError
+from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
 from .tts_pipeline import TTSEngine
 
 
@@ -52,7 +57,7 @@ def create_morphing_parameter(
 def get_morphable_targets(
     speakers: List[Speaker],
     base_speakers: List[int],
-) -> List[Dict[int, MorphableTargetInfo]]:
+) -> List[Dict[StyleId, MorphableTargetInfo]]:
     """
     speakers: 全話者の情報
     base_speakers: モーフィング可能か判定したいベースの話者リスト（スタイルID）
diff --git a/voicevox_engine/preset/Preset.py b/voicevox_engine/preset/Preset.py
index 82dc8daa2..527433c38 100644
--- a/voicevox_engine/preset/Preset.py
+++ b/voicevox_engine/preset/Preset.py
@@ -1,5 +1,7 @@
 from pydantic import BaseModel, Field
 
+from voicevox_engine.metas.Metas import StyleId
+
 
 class Preset(BaseModel):
     """
@@ -9,7 +11,7 @@ class Preset(BaseModel):
     id: int = Field(title="プリセットID")
     name: str = Field(title="プリセット名")
     speaker_uuid: str = Field(title="話者のUUID")
-    style_id: int = Field(title="スタイルID")
+    style_id: StyleId = Field(title="スタイルID")
     speedScale: float = Field(title="全体の話速")
     pitchScale: float = Field(title="全体の音高")
     intonationScale: float = Field(title="全体の抑揚")
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index ddd02afec..86f770d88 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -7,7 +7,8 @@
 
 from ..core_adapter import CoreAdapter
 from ..core_wrapper import CoreWrapper
-from ..model import AccentPhrase, AudioQuery, Mora, StyleId
+from ..metas.Metas import StyleId
+from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import Phoneme
 from .mora_list import openjtalk_mora2text
 from .text_analyzer import text_to_accent_phrases

From bea579248ef69af2ca14110b9c5463c34160dd10 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 3 Jan 2024 19:58:58 +0900
Subject: [PATCH 107/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20TTSEngine=20?=
 =?UTF-8?q?=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=81=AE=E3=83=AA=E3=83=8D?=
 =?UTF-8?q?=E3=83=BC=E3=83=A0=20(#956)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `replace_phoneme_length` リネーム

* refactor: `replace_mora_pitch` リネーム

* refactor: `replace_mora_data` リネーム

* refactor: `.synthesis` リネーム

* fix: 名前付き引数
---
 run.py                                     | 12 ++++-----
 test/test_mock_tts_engine.py               | 18 ++++++-------
 test/test_tts_engine.py                    | 10 +++----
 voicevox_engine/cancellable_engine.py      |  4 ++-
 voicevox_engine/dev/tts_engine/mock.py     |  2 +-
 voicevox_engine/morphing.py                |  4 +--
 voicevox_engine/tts_pipeline/tts_engine.py | 31 ++++++----------------
 7 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/run.py b/run.py
index dfb848bdf..01fa2da11 100644
--- a/run.py
+++ b/run.py
@@ -385,7 +385,7 @@ def accent_phrases(
                     status_code=400,
                     detail=ParseKanaBadRequest(err).dict(),
                 )
-            accent_phrases = engine.replace_mora_data(accent_phrases, style_id)
+            accent_phrases = engine.update_length_and_pitch(accent_phrases, style_id)
 
             return accent_phrases
         else:
@@ -405,7 +405,7 @@ def mora_data(
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
-        return engine.replace_mora_data(accent_phrases, style_id)
+        return engine.update_length_and_pitch(accent_phrases, style_id)
 
     @app.post(
         "/mora_length",
@@ -421,7 +421,7 @@ def mora_length(
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
-        return engine.replace_phoneme_length(accent_phrases, style_id)
+        return engine.update_length(accent_phrases, style_id)
 
     @app.post(
         "/mora_pitch",
@@ -437,7 +437,7 @@ def mora_pitch(
     ) -> list[AccentPhrase]:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
-        return engine.replace_mora_pitch(accent_phrases, style_id)
+        return engine.update_pitch(accent_phrases, style_id)
 
     @app.post(
         "/synthesis",
@@ -464,7 +464,7 @@ def synthesis(
     ) -> FileResponse:
         style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
         engine = get_engine(core_version)
-        wave = engine.synthesis(
+        wave = engine.synthesize_wave(
             query, style_id, enable_interrogative_upspeak=enable_interrogative_upspeak
         )
 
@@ -551,7 +551,7 @@ def multi_synthesis(
                         )
 
                     with TemporaryFile() as wav_file:
-                        wave = engine.synthesis(queries[i], style_id)
+                        wave = engine.synthesize_wave(queries[i], style_id)
                         soundfile.write(
                             file=wav_file,
                             data=wave,
diff --git a/test/test_mock_tts_engine.py b/test/test_mock_tts_engine.py
index 4161d2e51..152ce78de 100644
--- a/test/test_mock_tts_engine.py
+++ b/test/test_mock_tts_engine.py
@@ -47,17 +47,17 @@ def setUp(self):
         ]
         self.engine = MockTTSEngine()
 
-    def test_replace_phoneme_length(self):
-        """`.replace_phoneme_length()` がエラー無く生成をおこなう"""
-        self.engine.replace_phoneme_length(self.accent_phrases_hello_hiho, StyleId(0))
+    def test_update_length(self):
+        """`.update_length()` がエラー無く生成をおこなう"""
+        self.engine.update_length(self.accent_phrases_hello_hiho, StyleId(0))
 
-    def test_replace_mora_pitch(self):
-        """`.replace_mora_pitch()` がエラー無く生成をおこなう"""
-        self.engine.replace_mora_pitch(self.accent_phrases_hello_hiho, StyleId(0))
+    def test_update_pitch(self):
+        """`.update_pitch()` がエラー無く生成をおこなう"""
+        self.engine.update_pitch(self.accent_phrases_hello_hiho, StyleId(0))
 
-    def test_synthesis(self):
-        """`.synthesis()` がエラー無く生成をおこなう"""
-        self.engine.synthesis(
+    def test_synthesize_wave(self):
+        """`.synthesize_wave()` がエラー無く生成をおこなう"""
+        self.engine.synthesize_wave(
             AudioQuery(
                 accent_phrases=self.accent_phrases_hello_hiho,
                 speedScale=1,
diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index b0eeb0db4..50230460e 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -582,11 +582,11 @@ def test_pre_process(self):
             is_same_phoneme(phoneme_data_list[phoneme_index], Phoneme("pau"))
         )
 
-    def test_replace_phoneme_length(self):
+    def test_update_length(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_sに渡される値）
-        result = self.tts_engine.replace_phoneme_length(hello_hiho, StyleId(1))
+        result = self.tts_engine.update_length(hello_hiho, StyleId(1))
         yukarin_s_args = self.yukarin_s_mock.call_args[1]
         list_length = yukarin_s_args["length"]
         phoneme_list = yukarin_s_args["phoneme_list"]
@@ -624,12 +624,12 @@ def result_value(i: int) -> float:
         )
         self.assertEqual(result, true_result)
 
-    def test_replace_mora_pitch(self):
+    def test_update_pitch(self):
         # 空のリストでエラーを吐かないか
         # Inputs
         phrases: list = []
         # Outputs
-        result = self.tts_engine.replace_mora_pitch(phrases, StyleId(1))
+        result = self.tts_engine.update_pitch(phrases, StyleId(1))
         # Expects
         true_result: list = []
         # Tests
@@ -638,7 +638,7 @@ def test_replace_mora_pitch(self):
         # Inputs
         hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
         # Outputs & Indirect Outputs（yukarin_saに渡される値）
-        result = self.tts_engine.replace_mora_pitch(hello_hiho, StyleId(1))
+        result = self.tts_engine.update_pitch(hello_hiho, StyleId(1))
         yukarin_sa_args = self.yukarin_sa_mock.call_args[1]
         list_length = yukarin_sa_args["length"]
         vowel_phoneme_list = yukarin_sa_args["vowel_phoneme_list"][0]
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index 8bbc95d6d..63fb7110e 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -245,7 +245,9 @@ def start_synthesis_subprocess(
                 sub_proc_con.send("")
                 continue
             # FIXME: enable_interrogative_upspeakフラグをWebAPIから受け渡してくる
-            wave = _engine.synthesis(query, style_id, False)
+            wave = _engine.synthesize_wave(
+                query, style_id, enable_interrogative_upspeak=False
+            )
             with NamedTemporaryFile(delete=False) as f:
                 soundfile.write(
                     file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
diff --git a/voicevox_engine/dev/tts_engine/mock.py b/voicevox_engine/dev/tts_engine/mock.py
index e6a009c6c..c91e06aa6 100644
--- a/voicevox_engine/dev/tts_engine/mock.py
+++ b/voicevox_engine/dev/tts_engine/mock.py
@@ -19,7 +19,7 @@ class MockTTSEngine(TTSEngine):
     def __init__(self):
         super().__init__(MockCoreWrapper())
 
-    def synthesis(
+    def synthesize_wave(
         self,
         query: AudioQuery,
         style_id: StyleId,
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index d5daaafc7..90b97fa63 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -148,8 +148,8 @@ def synthesis_morphing_parameter(
     # WORLDに掛けるため合成はモノラルで行う
     query.outputStereo = False
 
-    base_wave = engine.synthesis(query, style_id=base_speaker).astype("float")
-    target_wave = engine.synthesis(query, style_id=target_speaker).astype("float")
+    base_wave = engine.synthesize_wave(query, style_id=base_speaker).astype("float")
+    target_wave = engine.synthesize_wave(query, style_id=target_speaker).astype("float")
 
     return create_morphing_parameter(
         base_wave=base_wave,
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 86f770d88..b1e15d4c2 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -252,7 +252,7 @@ def __init__(self, core: CoreWrapper):
         self._core = CoreAdapter(core)
         # NOTE: self._coreは将来的に消す予定
 
-    def replace_phoneme_length(
+    def update_length(
         self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
         """アクセント句系列に含まれるモーラの音素長属性をスタイルに合わせて更新する"""
@@ -282,22 +282,10 @@ def replace_phoneme_length(
 
         return accent_phrases
 
-    def replace_mora_pitch(
+    def update_pitch(
         self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
-        """
-        accent_phrasesの音高(ピッチ)を設定する
-        Parameters
-        ----------
-        accent_phrases : List[AccentPhrase]
-            アクセント句モデルのリスト
-        style_id : StyleId
-            スタイルID
-        Returns
-        -------
-        accent_phrases : List[AccentPhrase]
-            音高(ピッチ)が設定されたアクセント句モデルのリスト
-        """
+        """アクセント句系列に含まれるモーラの音高属性をスタイルに合わせて更新する"""
         # numpy.concatenateが空リストだとエラーを返すのでチェック
         if len(accent_phrases) == 0:
             return []
@@ -420,25 +408,22 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
 
         return accent_phrases
 
-    def replace_mora_data(
+    def update_length_and_pitch(
         self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
         """アクセント句系列の音素長・モーラ音高をスタイルIDに基づいて更新する"""
-        return self.replace_mora_pitch(
-            accent_phrases=self.replace_phoneme_length(accent_phrases, style_id),
+        return self.update_pitch(
+            accent_phrases=self.update_length(accent_phrases, style_id),
             style_id=style_id,
         )
 
     def create_accent_phrases(self, text: str, style_id: StyleId) -> list[AccentPhrase]:
         """テキストからアクセント句系列を生成し、スタイルIDに基づいてその音素長・モーラ音高を更新する"""
-        # 音素とアクセントの推定
         accent_phrases = text_to_accent_phrases(text)
-
-        # 音素長・モーラ音高の推定と更新
-        accent_phrases = self.replace_mora_data(accent_phrases, style_id)
+        accent_phrases = self.update_length_and_pitch(accent_phrases, style_id)
         return accent_phrases
 
-    def synthesis(
+    def synthesize_wave(
         self,
         query: AudioQuery,
         style_id: StyleId,

From 5d61144da98ba5aa68b7050c75e2f5475d14a60d Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 3 Jan 2024 20:23:09 +0900
Subject: [PATCH 108/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E3=83=86?=
 =?UTF-8?q?=E3=82=AD=E3=82=B9=E3=83=88=E5=88=86=E6=9E=90=E3=81=AE=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=20(#970)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: テキスト解析テスト

* fix: feature仕様コメントの移設

* fix: 明示的stub
---
 test/test_acoustic_feature_extractor.py       | 12 +++
 test/test_text_analyzer.py                    | 79 +++++++++++++++++++
 voicevox_engine/tts_pipeline/text_analyzer.py | 10 ++-
 3 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/test/test_acoustic_feature_extractor.py b/test/test_acoustic_feature_extractor.py
index 3840a5c8d..6539541bf 100644
--- a/test/test_acoustic_feature_extractor.py
+++ b/test/test_acoustic_feature_extractor.py
@@ -1,10 +1,22 @@
 from unittest import TestCase
 
+import pytest
+
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
 
 TRUE_NUM_PHONEME = 45
 
 
+def test_unknown_phoneme():
+    """Unknown音素 `xx` のID取得を拒否する"""
+    # Inputs
+    unknown_phoneme = Phoneme("xx")
+
+    # Tests
+    with pytest.raises(ValueError) as _:
+        _ = unknown_phoneme.phoneme_id
+
+
 class TestPhoneme(TestCase):
     def setUp(self):
         super().setUp()
diff --git a/test/test_text_analyzer.py b/test/test_text_analyzer.py
index a07912f42..2e3bb376a 100644
--- a/test/test_text_analyzer.py
+++ b/test/test_text_analyzer.py
@@ -1,11 +1,13 @@
 from unittest import TestCase
 
+from voicevox_engine.model import AccentPhrase, Mora
 from voicevox_engine.tts_pipeline.text_analyzer import (
     AccentPhraseLabel,
     BreathGroupLabel,
     Label,
     MoraLabel,
     UtteranceLabel,
+    text_to_accent_phrases,
 )
 
 
@@ -316,3 +318,80 @@ def test_phonemes(self):
     def test_features(self):
         """UtteranceLabel に含まれる features をテスト"""
         self.assertEqual(features(self.utterance_hello_hiho), self.test_case_hello_hiho)
+
+
+def _gen_mora(text: str, consonant: str | None, vowel: str) -> Mora:
+    return Mora(
+        text=text,
+        consonant=consonant,
+        consonant_length=0 if consonant else None,
+        vowel=vowel,
+        vowel_length=0,
+        pitch=0,
+    )
+
+
+def test_text_to_accent_phrases_normal():
+    """`text_to_accent_phrases` は正常な日本語文をパースする"""
+    # Inputs
+    text = "こんにちは、ヒホです。"
+    # Expects
+    true_accent_phrases = [
+        AccentPhrase(
+            moras=[
+                _gen_mora("コ", "k", "o"),
+                _gen_mora("ン", None, "N"),
+                _gen_mora("ニ", "n", "i"),
+                _gen_mora("チ", "ch", "i"),
+                _gen_mora("ワ", "w", "a"),
+            ],
+            accent=5,
+            pause_mora=_gen_mora("、", None, "pau"),
+        ),
+        AccentPhrase(
+            moras=[
+                _gen_mora("ヒ", "h", "i"),
+                _gen_mora("ホ", "h", "o"),
+                _gen_mora("デ", "d", "e"),
+                _gen_mora("ス", "s", "U"),
+            ],
+            accent=1,
+            pause_mora=None,
+        ),
+    ]
+    # Outputs
+    accent_phrases = text_to_accent_phrases(text)
+    # Tests
+    assert accent_phrases == true_accent_phrases
+
+
+def stub_unknown_features_koxx(_: str) -> list[str]:
+    """`sil-k-o-xx-sil` に相当する features を常に返す `text_to_features()` のStub"""
+    return [
+        ".^.-sil+.=./A:.+xx+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:xx_xx#xx_.@xx_.|._./G:._.%._._./H:._./I:.-.@xx+.&.-.|.+./J:._./K:.+.-.",
+        ".^.-k+.=./A:.+1+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.|._./G:._.%._._./H:._./I:.-.@1+.&.-.|.+./J:._./K:.+.-.",
+        ".^.-o+.=./A:.+1+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.|._./G:._.%._._./H:._./I:.-.@1+.&.-.|.+./J:._./K:.+.-.",
+        ".^.-xx+.=./A:.+2+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.|._./G:._.%._._./H:._./I:.-.@1+.&.-.|.+./J:._./K:.+.-.",
+        ".^.-sil+.=./A:.+xx+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:xx_xx#xx_.@xx_.|._./G:._.%._._./H:._./I:.-.@xx+.&.-.|.+./J:._./K:.+.-.",
+    ]
+
+
+def test_text_to_accent_phrases_unknown():
+    """`text_to_accent_phrases` は unknown 音素を含む features をパースする"""
+    # Expects
+    true_accent_phrases = [
+        AccentPhrase(
+            moras=[
+                _gen_mora("コ", "k", "o"),
+                _gen_mora("xx", None, "xx"),
+            ],
+            accent=1,
+            pause_mora=None,
+        ),
+    ]
+    # Outputs
+    accent_phrases = text_to_accent_phrases(
+        "dummy", text_to_features=stub_unknown_features_koxx
+    )
+    # Tests
+    assert accent_phrases == true_accent_phrases
diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py
index 3e4294702..1108703a2 100644
--- a/voicevox_engine/tts_pipeline/text_analyzer.py
+++ b/voicevox_engine/tts_pipeline/text_analyzer.py
@@ -1,7 +1,7 @@
 import re
 from dataclasses import dataclass
 from itertools import chain
-from typing import Literal, Self
+from typing import Callable, Literal, Self
 
 import pyopenjtalk
 
@@ -59,6 +59,7 @@ class Label:
     def from_feature(cls, feature: str) -> Self:
         """OpenJTalk feature から Label インスタンスを生成する"""
         # フルコンテキストラベルの仕様は、http://hts.sp.nitech.ac.jp/?Download の HTS-2.3のJapanese tar.bz2 (126 MB)をダウンロードして、data/lab_format.pdfを見るとリストが見つかります。 # noqa
+        # VOICEVOX ENGINE で利用されている属性: p3 phoneme / a2 moraIdx / f1 n_mora / f2 pos_accent / f3 疑問形 / f5 アクセント句Idx / i3 BreathGroupIdx  # noqa: B950
         result = re.search(
             r"^(?P<p1>.+?)\^(?P<p2>.+?)\-(?P<p3>.+?)\+(?P<p4>.+?)\=(?P<p5>.+?)"
             r"/A\:(?P<a1>.+?)\+(?P<a2>.+?)\+(?P<a3>.+?)"
@@ -332,13 +333,16 @@ def _utterance_to_accent_phrases(utterance: UtteranceLabel) -> list[AccentPhrase
     ]
 
 
-def text_to_accent_phrases(text: str) -> list[AccentPhrase]:
+def text_to_accent_phrases(
+    text: str,
+    text_to_features: Callable[[str], list[str]] = pyopenjtalk.extract_fullcontext,
+) -> list[AccentPhrase]:
     """日本語文からアクセント句系列を生成する"""
     if len(text.strip()) == 0:
         return []
 
     # 日本語文からUtteranceLabelを抽出する
-    features: list[str] = pyopenjtalk.extract_fullcontext(text)  # type: ignore
+    features = text_to_features(text)
     utterance = UtteranceLabel.from_labels(list(map(Label.from_feature, features)))
 
     # ドメインを変換する

From 7c9efb51f751e7dec9817e2de8fb65a9e6939069 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 3 Jan 2024 20:46:01 +0900
Subject: [PATCH 109/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`Label`=20context?=
 =?UTF-8?q?=20=E5=80=8B=E5=88=A5=E5=B1=9E=E6=80=A7=E3=81=AE=E3=82=A2?=
 =?UTF-8?q?=E3=82=AF=E3=82=BB=E3=83=83=E3=82=B5=20(#971)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: context属性

* idx to index

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 voicevox_engine/tts_pipeline/text_analyzer.py | 47 +++++++++++++------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py
index 1108703a2..75f7cd24b 100644
--- a/voicevox_engine/tts_pipeline/text_analyzer.py
+++ b/voicevox_engine/tts_pipeline/text_analyzer.py
@@ -87,10 +87,34 @@ def phoneme(self) -> OjtPhoneme:
         # FIXME: バリデーションする
         return self.contexts["p3"]  # type: ignore
 
-    def is_pause(self):
+    @property
+    def mora_index(self) -> int:
+        """アクセント句内におけるモーラのインデックス (1 ~ 49)"""
+        return int(self.contexts["a2"])
+
+    def is_pause(self) -> bool:
         """このラベルが無音 (silent/pause) であれば True、そうでなければ False を返す"""
         return self.contexts["f1"] == "xx"
 
+    @property
+    def accent_position(self) -> int:
+        """アクセント句内でのアクセント位置 (1 ~ 49)"""
+        return int(self.contexts["f2"])
+
+    def is_interrogative(self) -> bool:
+        """疑問形か否か"""
+        return self.contexts["f3"] == "1"
+
+    @property
+    def accent_phrase_index(self) -> str:
+        """BreathGroup内におけるアクセント句のインデックス"""
+        return self.contexts["f5"]
+
+    @property
+    def breath_group_index(self) -> str:
+        """BreathGroupのインデックス"""
+        return self.contexts["i3"]
+
     def __repr__(self):
         return f"<Label phoneme='{self.phoneme}'>"
 
@@ -130,16 +154,15 @@ def from_labels(cls, labels: list[Label]) -> Self:
 
         for label, next_label in zip(labels, labels[1:] + [None]):
             # モーラ抽出を打ち切る（ワークアラウンド、VOICEVOX/voicevox_engine#57）
-            # context a2（モーラ番号）の最大値が 49 であるため、49番目以降のモーラではラベルのモーラ番号を区切りに使えない
-            if int(label.contexts["a2"]) == 49:
+            # mora_index の最大値が 49 であるため、49番目以降のモーラではラベルのモーラ番号を区切りに使えない
+            if label.mora_index == 49:
                 break
 
             # 区切りまでラベル系列を一時保存する
             mora_labels.append(label)
 
             # 一時的なラベル系列を確定させて処理する
-            # a2はアクセント句内でのモーラ番号(1~49)
-            if next_label is None or label.contexts["a2"] != next_label.contexts["a2"]:
+            if next_label is None or label.mora_index != next_label.mora_index:
                 # モーラごとのラベル系列長に基づいて子音と母音を得る
                 if len(mora_labels) == 1:
                     consonant, vowel = None, mora_labels[0]
@@ -154,14 +177,12 @@ def from_labels(cls, labels: list[Label]) -> Self:
                 mora_labels = []
 
         # アクセント位置を決定する
-        # f2はアクセント句のアクセント位置(1~49)
-        accent = int(moras[0].vowel.contexts["f2"])
-        # f2 の値がアクセント句内のモーラ数を超える場合はクリップ（ワークアラウンド、VOICEVOX/voicevox_engine#55 を参照）
+        accent = moras[0].vowel.accent_position
+        # アクセント位置の値がアクセント句内のモーラ数を超える場合はクリップ（ワークアラウンド、VOICEVOX/voicevox_engine#55 を参照）
         accent = accent if accent <= len(moras) else len(moras)
 
         # 疑問文か否か判定する（末尾モーラ母音のcontextに基づく）
-        # f3はアクセント句が疑問文かどうか（1で疑問文）
-        is_interrogative = moras[-1].vowel.contexts["f3"] == "1"
+        is_interrogative = moras[-1].vowel.is_interrogative()
 
         # アクセント句ラベルを生成する
         accent_phrase = cls(
@@ -196,12 +217,10 @@ def from_labels(cls, labels: list[Label]) -> Self:
             accent_labels.append(label)
 
             # 一時的なラベル系列を確定させて処理する
-            # i3はBreathGroupの番号
-            # f5はBreathGroup内でのアクセント句の番号
             if (
                 next_label is None
-                or label.contexts["i3"] != next_label.contexts["i3"]
-                or label.contexts["f5"] != next_label.contexts["f5"]
+                or label.breath_group_index != next_label.breath_group_index
+                or label.accent_phrase_index != next_label.accent_phrase_index
             ):
                 # アクセント句を生成して保存する
                 accent_phrase = AccentPhraseLabel.from_labels(accent_labels)

From 1ab38c3f77bb98385c9af710c4cdaefd1b8c4ff2 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 5 Jan 2024 13:23:16 +0900
Subject: [PATCH 110/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20`split=5Fmora()`?=
 =?UTF-8?q?=20=E4=B8=8D=E4=BD=BF=E7=94=A8=E8=BF=94=E3=82=8A=E5=80=A4=20(#9?=
 =?UTF-8?q?73)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove: `split_mora()` 不使用返り値の廃止
---
 test/test_tts_engine.py                    |  4 +---
 voicevox_engine/tts_pipeline/tts_engine.py | 14 ++++++--------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index 50230460e..6c5ef0c46 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -513,12 +513,10 @@ def test_to_flatten_moras(self):
 
     def test_split_mora(self):
         # Outputs
-        consonant_phoneme_list, vowel_phoneme_list, vowel_indexes = split_mora(
+        consonant_phoneme_list, vowel_phoneme_list = split_mora(
             self.phoneme_data_list_hello_hiho
         )
 
-        self.assertEqual(vowel_indexes, [0, 2, 3, 5, 7, 9, 10, 12, 14, 16, 18, 19])
-
         ps = ["pau", "o", "N", "i", "i", "a", "pau", "i", "o", "e", "U", "pau"]
         true_vowel_phoneme_list = [Phoneme(p) for p in ps]
         self.assertTrue(
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index b1e15d4c2..81560087c 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -55,8 +55,8 @@ def to_flatten_phonemes(moras: list[Mora]) -> list[Phoneme]:
 
 def split_mora(
     phoneme_list: list[Phoneme],
-) -> tuple[list[Phoneme | None], list[Phoneme], list[int]]:
-    """音素系列から子音系列・母音系列・母音位置を抽出する"""
+) -> tuple[list[Phoneme | None], list[Phoneme]]:
+    """音素系列から子音系列と母音系列を抽出する"""
     vowel_indexes = [
         i for i, p in enumerate(phoneme_list) if p.phoneme in mora_phoneme_list
     ]
@@ -70,7 +70,7 @@ def split_mora(
         None if post - prev == 1 else phoneme_list[post - 1]
         for prev, post in zip(vowel_indexes[:-1], vowel_indexes[1:])
     ]
-    return consonant_phoneme_list, vowel_phoneme_list, vowel_indexes
+    return consonant_phoneme_list, vowel_phoneme_list
 
 
 def pre_process(
@@ -366,11 +366,9 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
         end_accent_phrase_list = numpy.array(end_accent_phrase_list, dtype=numpy.int64)
 
         # phonemeに関するデータを取得(変換)する
-        (
-            consonant_phoneme_data_list,
-            vowel_phoneme_data_list,
-            _,
-        ) = split_mora(phoneme_data_list)
+        (consonant_phoneme_data_list, vowel_phoneme_data_list) = split_mora(
+            phoneme_data_list
+        )
 
         # yukarin_sa
         # Phoneme関連のデータをyukarin_sa_forwarderに渡すための最終処理、リスト内のデータをint64に変換する

From ab8a5d622df755d46d0820e49896f53d7523efa5 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 5 Jan 2024 14:28:41 +0900
Subject: [PATCH 111/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`update=5Flength?=
 =?UTF-8?q?=5Fand=5Fpitch()`=20(#975)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `update_length_and_pitch()`
---
 voicevox_engine/tts_pipeline/tts_engine.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 81560087c..46b4876e1 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -410,10 +410,9 @@ def update_length_and_pitch(
         self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
         """アクセント句系列の音素長・モーラ音高をスタイルIDに基づいて更新する"""
-        return self.update_pitch(
-            accent_phrases=self.update_length(accent_phrases, style_id),
-            style_id=style_id,
-        )
+        accent_phrases = self.update_length(accent_phrases, style_id)
+        accent_phrases = self.update_pitch(accent_phrases, style_id)
+        return accent_phrases
 
     def create_accent_phrases(self, text: str, style_id: StyleId) -> list[AccentPhrase]:
         """テキストからアクセント句系列を生成し、スタイルIDに基づいてその音素長・モーラ音高を更新する"""

From cc884399f1830d9ab43d33dd516972ac0c4afaf3 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 5 Jan 2024 18:56:46 +0900
Subject: [PATCH 112/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20=E3=82=B3?=
 =?UTF-8?q?=E3=82=A2=E3=83=A2=E3=83=83=E3=82=AF=E5=87=BA=E5=8A=9B=E5=80=A4?=
 =?UTF-8?q?=E3=81=AE=E7=B5=B1=E4=B8=80=20(#977)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

add: aligned CoreMock
---
 test/test_tts_engine.py | 63 ++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index 6c5ef0c46..e3d238eee 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -62,7 +62,7 @@ def yukarin_s_mock(
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
-        result.append((phoneme_list[i] * 0.5 + style_id).item())
+        result.append(round((phoneme_list[i] * 0.0625 + style_id).item(), 2))
     return numpy.array(result)
 
 
@@ -80,18 +80,21 @@ def yukarin_sa_mock(
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
         result.append(
-            (
+            round(
                 (
-                    vowel_phoneme_list[0][i]
-                    + consonant_phoneme_list[0][i]
-                    + start_accent_list[0][i]
-                    + end_accent_list[0][i]
-                    + start_accent_phrase_list[0][i]
-                    + end_accent_phrase_list[0][i]
-                )
-                * 0.5
-                + style_id
-            ).item()
+                    (
+                        vowel_phoneme_list[0][i]
+                        + consonant_phoneme_list[0][i]
+                        + start_accent_list[0][i]
+                        + end_accent_list[0][i]
+                        + start_accent_phrase_list[0][i]
+                        + end_accent_phrase_list[0][i]
+                    )
+                    * 0.0625
+                    + style_id
+                ).item(),
+                2,
+            )
         )
     return numpy.array(result)[numpy.newaxis]
 
@@ -106,14 +109,9 @@ def decode_mock(
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
-        # decode forwardはデータサイズがlengthの256倍になるのでとりあえず256回データをresultに入れる
-        for _ in range(256):
-            result.append(
-                (
-                    f0[i][0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size)
-                    + style_id
-                ).item()
-            )
+        result += [
+            (f0[i, 0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size) + style_id)
+        ] * 256
     return numpy.array(result)
 
 
@@ -599,7 +597,7 @@ def test_update_length(self):
         index = 1
 
         def result_value(i: int) -> float:
-            return float(phoneme_list[i] * 0.5 + 1)
+            return round(float(phoneme_list[i] * 0.0625 + 1), 2)
 
         for accent_phrase in true_result:
             moras = accent_phrase.moras
@@ -663,14 +661,21 @@ def result_value(i: int) -> float:
             ]
             if vowel_phoneme_list[i] in unvoiced_mora_phoneme_id_list:
                 return 0
-            return (
-                vowel_phoneme_list[i]
-                + consonant_phoneme_list[i]
-                + start_accent_list[i]
-                + end_accent_list[i]
-                + start_accent_phrase_list[i]
-                + end_accent_phrase_list[i]
-            ) * 0.5 + 1
+            return round(
+                (
+                    (
+                        vowel_phoneme_list[i]
+                        + consonant_phoneme_list[i]
+                        + start_accent_list[i]
+                        + end_accent_list[i]
+                        + start_accent_phrase_list[i]
+                        + end_accent_phrase_list[i]
+                    )
+                    * 0.0625
+                    + 1
+                ),
+                2,
+            )
 
         for accent_phrase in true_result:
             moras = accent_phrase.moras

From 77a3343777d8719687a82ad19fa92f7fde1b5d5d Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 5 Jan 2024 18:58:04 +0900
Subject: [PATCH 113/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20kana=20=E5=9E=8B?=
 =?UTF-8?q?=20(#972)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: kana 型

* refactor: VOICEVOX `Vowel` 共通化

* refactor: `_KanaConsonant` から None 分離

* refactor: `Consonant` 共通化

* refactor: モーラカタカナ文字型

* refactor: `mora_alphabet/katakana`

* refactor: mora prefix

* Update voicevox_engine/tts_pipeline/kana_converter.py

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

* Update voicevox_engine/tts_pipeline/acoustic_feature_extractor.py

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

* Update voicevox_engine/tts_pipeline/kana_converter.py

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_mora_list.py                        |  16 +-
 .../acoustic_feature_extractor.py             |   5 +-
 .../tts_pipeline/kana_converter.py            |  29 +--
 voicevox_engine/tts_pipeline/mora_list.py     | 187 +++++++++++++++++-
 voicevox_engine/tts_pipeline/text_analyzer.py |  14 +-
 voicevox_engine/tts_pipeline/tts_engine.py    |   4 +-
 6 files changed, 217 insertions(+), 38 deletions(-)

diff --git a/test/test_mora_list.py b/test/test_mora_list.py
index a2928205a..5cc497aeb 100644
--- a/test/test_mora_list.py
+++ b/test/test_mora_list.py
@@ -1,20 +1,20 @@
 from unittest import TestCase
 
-from voicevox_engine.tts_pipeline.mora_list import openjtalk_mora2text
+from voicevox_engine.tts_pipeline.mora_list import mora_phonemes_to_mora_kana
 
 
 class TestOpenJTalkMoraList(TestCase):
     def test_mora2text(self):
-        self.assertEqual("ッ", openjtalk_mora2text["cl"])
-        self.assertEqual("ティ", openjtalk_mora2text["ti"])
-        self.assertEqual("トゥ", openjtalk_mora2text["tu"])
-        self.assertEqual("ディ", openjtalk_mora2text["di"])
+        self.assertEqual("ッ", mora_phonemes_to_mora_kana["cl"])
+        self.assertEqual("ティ", mora_phonemes_to_mora_kana["ti"])
+        self.assertEqual("トゥ", mora_phonemes_to_mora_kana["tu"])
+        self.assertEqual("ディ", mora_phonemes_to_mora_kana["di"])
         # GitHub issue #60
-        self.assertEqual("ギェ", openjtalk_mora2text["gye"])
-        self.assertEqual("イェ", openjtalk_mora2text["ye"])
+        self.assertEqual("ギェ", mora_phonemes_to_mora_kana["gye"])
+        self.assertEqual("イェ", mora_phonemes_to_mora_kana["ye"])
 
     def test_mora2text_injective(self):
         """異なるモーラが同じ読みがなに対応しないか確認する"""
-        values = list(openjtalk_mora2text.values())
+        values = list(mora_phonemes_to_mora_kana.values())
         uniq_values = list(set(values))
         self.assertCountEqual(values, uniq_values)
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index ecb3104bf..a3e774ec4 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -3,7 +3,10 @@
 import numpy
 
 # NOTE: `Vowel` は母音 (a/i/u/e/o の有声・無声) + 無音 pau + 撥音 N ("ん") + 促音 cl ("っ")
-Vowel = Literal["pau", "A", "E", "I", "N", "O", "U", "a", "cl", "e", "i", "o", "u"]
+# NOTE: 型の名称は暫定的
+BaseVowel = Literal["pau", "N", "a", "cl", "e", "i", "o", "u"]
+Vowel = BaseVowel | Literal["A", "E", "I", "O", "U"]
+
 Consonant = Literal[
     "b",
     "by",
diff --git a/voicevox_engine/tts_pipeline/kana_converter.py b/voicevox_engine/tts_pipeline/kana_converter.py
index abcac68e6..c49ab8461 100644
--- a/voicevox_engine/tts_pipeline/kana_converter.py
+++ b/voicevox_engine/tts_pipeline/kana_converter.py
@@ -17,7 +17,8 @@
 from typing import List, Optional
 
 from ..model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
-from .mora_list import openjtalk_text2mora
+from .acoustic_feature_extractor import Vowel
+from .mora_list import mora_kana_to_mora_phonemes
 
 _LOOP_LIMIT = 300
 
@@ -28,11 +29,11 @@
 _PAUSE_DELIMITER = "、"  # ポーズ有りアクセント句境界
 _WIDE_INTERROGATION_MARK = "？"  # 疑問形
 
-# AquesTalk 風記法とモーラの対応（音素長・音高 0 初期化）
-_text2mora_with_unvoice = {}
-for text, (consonant, vowel) in openjtalk_text2mora.items():
-    _text2mora_with_unvoice[text] = Mora(
-        text=text,
+# AquesTalk 風記法とモーラの対応。無声母音も含む。（音素長・音高 0 初期化）
+_kana2mora: dict[str, Mora] = {}
+for kana, (consonant, vowel) in mora_kana_to_mora_phonemes.items():
+    _kana2mora[kana] = Mora(
+        text=kana,
         consonant=consonant,
         consonant_length=0 if consonant else None,
         vowel=vowel,
@@ -40,13 +41,15 @@
         pitch=0,
     )
     if vowel in ["a", "i", "u", "e", "o"]:
-        # 「`_` で無声化」の実装
-        # 例: "_ホ" -> "hO"
-        _text2mora_with_unvoice[_UNVOICE_SYMBOL + text] = Mora(
-            text=text,
+        # 「`_` で無声化」の実装。例: "_ホ" -> "hO"
+        # NOTE: 現行の型システムは Conditional Literal + upper に非対応.
+        # FIXME: バリデーションする
+        upper_vowel: Vowel = vowel.upper()  # type: ignore
+        _kana2mora[_UNVOICE_SYMBOL + kana] = Mora(
+            text=kana,
             consonant=consonant,
             consonant_length=0 if consonant else None,
-            vowel=vowel.upper(),
+            vowel=upper_vowel,
             vowel_length=0,
             pitch=0,
         )
@@ -98,13 +101,13 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
             if phrase[watch_index] == _ACCENT_SYMBOL:
                 break
             stack += phrase[watch_index]
-            if stack in _text2mora_with_unvoice:
+            if stack in _kana2mora:
                 matched_text = stack
         if matched_text is None:
             raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack)
         # push mora
         else:
-            moras.append(_text2mora_with_unvoice[matched_text].copy(deep=True))
+            moras.append(_kana2mora[matched_text].copy(deep=True))
             base_index += len(matched_text)
             stack = ""
             matched_text = None
diff --git a/voicevox_engine/tts_pipeline/mora_list.py b/voicevox_engine/tts_pipeline/mora_list.py
index 1f96658a8..c10931ae2 100644
--- a/voicevox_engine/tts_pipeline/mora_list.py
+++ b/voicevox_engine/tts_pipeline/mora_list.py
@@ -42,7 +42,178 @@
 POSSIBILITY OF SUCH DAMAGE.
 """
 
-_mora_list_minimum: list[tuple[str, str | None, str]] = [
+from typing import Literal
+
+from .acoustic_feature_extractor import BaseVowel, Consonant
+
+# AquesTalk 風記法で記述されるモーラ（無声化 `_` を除く）
+_MoraKana = Literal[
+    "ァ",
+    "ア",
+    "ィ",
+    "イ",
+    "イェ",
+    "ゥ",
+    "ウ",
+    "ウィ",
+    "ウェ",
+    "ウォ",
+    "ェ",
+    "エ",
+    "ォ",
+    "オ",
+    "カ",
+    "ガ",
+    "キ",
+    "キェ",
+    "キャ",
+    "キュ",
+    "キョ",
+    "ギ",
+    "ギェ",
+    "ギャ",
+    "ギュ",
+    "ギョ",
+    "ク",
+    "クヮ",
+    "グ",
+    "グヮ",
+    "ケ",
+    "ゲ",
+    "コ",
+    "ゴ",
+    "サ",
+    "ザ",
+    "シ",
+    "シェ",
+    "シャ",
+    "シュ",
+    "ショ",
+    "ジ",
+    "ジェ",
+    "ジャ",
+    "ジュ",
+    "ジョ",
+    "ス",
+    "スィ",
+    "ズ",
+    "ズィ",
+    "セ",
+    "ゼ",
+    "ソ",
+    "ゾ",
+    "タ",
+    "ダ",
+    "チ",
+    "チェ",
+    "チャ",
+    "チュ",
+    "チョ",
+    "ヂ",
+    "ッ",
+    "ツ",
+    "ツァ",
+    "ツィ",
+    "ツェ",
+    "ツォ",
+    "ヅ",
+    "テ",
+    "ティ",
+    "テャ",
+    "テュ",
+    "テョ",
+    "デ",
+    "ディ",
+    "デェ",
+    "デャ",
+    "デュ",
+    "デョ",
+    "ト",
+    "トゥ",
+    "ド",
+    "ドゥ",
+    "ナ",
+    "ニ",
+    "ニェ",
+    "ニャ",
+    "ニュ",
+    "ニョ",
+    "ヌ",
+    "ネ",
+    "ノ",
+    "ハ",
+    "バ",
+    "パ",
+    "ヒ",
+    "ヒェ",
+    "ヒャ",
+    "ヒュ",
+    "ヒョ",
+    "ビ",
+    "ビェ",
+    "ビャ",
+    "ビュ",
+    "ビョ",
+    "ピ",
+    "ピェ",
+    "ピャ",
+    "ピュ",
+    "ピョ",
+    "フ",
+    "ファ",
+    "フィ",
+    "フェ",
+    "フォ",
+    "ブ",
+    "プ",
+    "ヘ",
+    "ベ",
+    "ペ",
+    "ホ",
+    "ボ",
+    "ポ",
+    "マ",
+    "ミ",
+    "ミェ",
+    "ミャ",
+    "ミュ",
+    "ミョ",
+    "ム",
+    "メ",
+    "モ",
+    "ャ",
+    "ヤ",
+    "ュ",
+    "ユ",
+    "ョ",
+    "ヨ",
+    "ラ",
+    "リ",
+    "リェ",
+    "リャ",
+    "リュ",
+    "リョ",
+    "ル",
+    "レ",
+    "ロ",
+    "ヮ",
+    "ワ",
+    "ヰ",
+    "ヱ",
+    "ヲ",
+    "ン",
+    "ヴ",
+    "ヴァ",
+    "ヴィ",
+    "ヴェ",
+    "ヴォ",
+    "ヴャ",
+    "ヴュ",
+    "ヴョ",
+    "ヶ",
+]
+
+_mora_list_minimum: list[tuple[_MoraKana, Consonant | None, BaseVowel]] = [
     ("ヴォ", "v", "o"),
     ("ヴェ", "v", "e"),
     ("ヴィ", "v", "i"),
@@ -189,7 +360,7 @@
     ("イ", None, "i"),
     ("ア", None, "a"),
 ]
-_mora_list_additional: list[tuple[str, str | None, str]] = [
+_mora_list_additional: list[tuple[_MoraKana, Consonant | None, BaseVowel]] = [
     ("ヴョ", "by", "o"),
     ("ヴュ", "by", "u"),
     ("ヴャ", "by", "a"),
@@ -210,10 +381,12 @@
     ("ァ", None, "a"),
 ]
 
-openjtalk_mora2text = {
-    (consonant or "") + vowel: text for [text, consonant, vowel] in _mora_list_minimum
+# 「hi」→「ヒ」
+mora_phonemes_to_mora_kana: dict[str, _MoraKana] = {
+    (consonant or "") + vowel: kana for [kana, consonant, vowel] in _mora_list_minimum
 }
-openjtalk_text2mora = {
-    text: (consonant, vowel)
-    for [text, consonant, vowel] in _mora_list_minimum + _mora_list_additional
+# 「ヒ」→「hi」
+mora_kana_to_mora_phonemes: dict[_MoraKana, tuple[Consonant | None, BaseVowel]] = {
+    kana: (consonant, vowel)
+    for [kana, consonant, vowel] in _mora_list_minimum + _mora_list_additional
 }
diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py
index 75f7cd24b..37fce007b 100644
--- a/voicevox_engine/tts_pipeline/text_analyzer.py
+++ b/voicevox_engine/tts_pipeline/text_analyzer.py
@@ -6,7 +6,7 @@
 import pyopenjtalk
 
 from ..model import AccentPhrase, Mora
-from .mora_list import openjtalk_mora2text
+from .mora_list import mora_phonemes_to_mora_kana
 
 OjtVowel = Literal[
     "A", "E", "I", "N", "O", "U", "a", "cl", "e", "i", "o", "pau", "sil", "u"
@@ -295,15 +295,15 @@ def labels(self) -> list[Label]:
         return labels
 
 
-def mora_to_text(mora: str) -> str:
+def mora_to_text(mora_phonemes: str) -> str:
     """モーラ相当の音素文字系列を日本語カタカナ文へ変換する（例: 'hO' -> 'ホ')"""
-    if mora[-1:] in ["A", "I", "U", "E", "O"]:
+    if mora_phonemes[-1:] in ["A", "I", "U", "E", "O"]:
         # 無声化母音を小文字に
-        mora = mora[:-1] + mora[-1].lower()
-    if mora in openjtalk_mora2text:
-        return openjtalk_mora2text[mora]
+        mora_phonemes = mora_phonemes[:-1] + mora_phonemes[-1].lower()
+    if mora_phonemes in mora_phonemes_to_mora_kana:
+        return mora_phonemes_to_mora_kana[mora_phonemes]
     else:
-        return mora
+        return mora_phonemes
 
 
 def _mora_labels_to_moras(mora_labels: list[MoraLabel]) -> list[Mora]:
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 46b4876e1..575668f51 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -10,7 +10,7 @@
 from ..metas.Metas import StyleId
 from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import Phoneme
-from .mora_list import openjtalk_mora2text
+from .mora_list import mora_phonemes_to_mora_kana
 from .text_analyzer import text_to_accent_phrases
 
 unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
@@ -107,7 +107,7 @@ def apply_interrogative_upspeak(
         if accent_phrase.is_interrogative and moras[-1].pitch > 0:
             last_mora = copy.deepcopy(moras[-1])
             upspeak_mora = Mora(
-                text=openjtalk_mora2text[last_mora.vowel],
+                text=mora_phonemes_to_mora_kana[last_mora.vowel],
                 consonant=None,
                 consonant_length=None,
                 vowel=last_mora.vowel,

From c14f131f06732f45f0a4a0c29a058904be52d23a Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 6 Jan 2024 21:52:49 +0900
Subject: [PATCH 114/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=E5=85=A5=E5=8A=9B=E7=94=9F=E6=88=90=E3=83=A6?=
 =?UTF-8?q?=E3=83=BC=E3=83=86=E3=82=A3=E3=83=AA=E3=83=86=E3=82=A3=20(#979)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: テスト入力 gen util

* refactor: 入力データ関数切り出し
---
 test/test_tts_engine.py | 86 ++++++++++++++++++++---------------------
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index e3d238eee..9a663fb80 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -1,4 +1,3 @@
-from copy import deepcopy
 from typing import Union
 from unittest import TestCase
 from unittest.mock import Mock
@@ -462,38 +461,40 @@ def test_raw_wave_to_output_wave_without_resample():
     assert numpy.allclose(wave, true_wave)
 
 
+def _gen_hello_hiho_phonemes() -> list[Phoneme]:
+    hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
+    return [Phoneme(p) for p in hello_hiho.split()]
+
+
+def _gen_hello_hiho_accent_phrases() -> list[AccentPhrase]:
+    return [
+        AccentPhrase(
+            moras=[
+                _gen_mora("コ", "k", 0.0, "o", 0.0, 0.0),
+                _gen_mora("ン", None, None, "N", 0.0, 0.0),
+                _gen_mora("ニ", "n", 0.0, "i", 0.0, 0.0),
+                _gen_mora("チ", "ch", 0.0, "i", 0.0, 0.0),
+                _gen_mora("ワ", "w", 0.0, "a", 0.0, 0.0),
+            ],
+            accent=5,
+            pause_mora=_gen_mora("、", None, None, "pau", 0.0, 0.0),
+        ),
+        AccentPhrase(
+            moras=[
+                _gen_mora("ヒ", "h", 0.0, "i", 0.0, 0.0),
+                _gen_mora("ホ", "h", 0.0, "o", 0.0, 0.0),
+                _gen_mora("デ", "d", 0.0, "e", 0.0, 0.0),
+                _gen_mora("ス", "s", 0.0, "U", 0.0, 0.0),
+            ],
+            accent=1,
+            pause_mora=None,
+        ),
+    ]
+
+
 class TestTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
-        self.str_list_hello_hiho = (
-            "sil k o N n i ch i w a pau h i h o d e s U sil".split()
-        )
-        self.phoneme_data_list_hello_hiho = [
-            Phoneme(p) for p in "pau k o N n i ch i w a pau h i h o d e s U pau".split()
-        ]
-        self.accent_phrases_hello_hiho = [
-            AccentPhrase(
-                moras=[
-                    _gen_mora("コ", "k", 0.0, "o", 0.0, 0.0),
-                    _gen_mora("ン", None, None, "N", 0.0, 0.0),
-                    _gen_mora("ニ", "n", 0.0, "i", 0.0, 0.0),
-                    _gen_mora("チ", "ch", 0.0, "i", 0.0, 0.0),
-                    _gen_mora("ワ", "w", 0.0, "a", 0.0, 0.0),
-                ],
-                accent=5,
-                pause_mora=_gen_mora("、", None, None, "pau", 0.0, 0.0),
-            ),
-            AccentPhrase(
-                moras=[
-                    _gen_mora("ヒ", "h", 0.0, "i", 0.0, 0.0),
-                    _gen_mora("ホ", "h", 0.0, "o", 0.0, 0.0),
-                    _gen_mora("デ", "d", 0.0, "e", 0.0, 0.0),
-                    _gen_mora("ス", "s", 0.0, "U", 0.0, 0.0),
-                ],
-                accent=1,
-                pause_mora=None,
-            ),
-        ]
         core = MockCore()
         self.yukarin_s_mock = core.yukarin_s_forward
         self.yukarin_sa_mock = core.yukarin_sa_forward
@@ -501,18 +502,19 @@ def setUp(self):
         self.tts_engine = TTSEngine(core=core)  # type: ignore[arg-type]
 
     def test_to_flatten_moras(self):
-        flatten_moras = to_flatten_moras(self.accent_phrases_hello_hiho)
+        flatten_moras = to_flatten_moras(_gen_hello_hiho_accent_phrases())
+        true_accent_phrases_hello_hiho = _gen_hello_hiho_accent_phrases()
         self.assertEqual(
             flatten_moras,
-            self.accent_phrases_hello_hiho[0].moras
-            + [self.accent_phrases_hello_hiho[0].pause_mora]
-            + self.accent_phrases_hello_hiho[1].moras,
+            true_accent_phrases_hello_hiho[0].moras
+            + [true_accent_phrases_hello_hiho[0].pause_mora]
+            + true_accent_phrases_hello_hiho[1].moras,
         )
 
     def test_split_mora(self):
         # Outputs
         consonant_phoneme_list, vowel_phoneme_list = split_mora(
-            self.phoneme_data_list_hello_hiho
+            _gen_hello_hiho_phonemes()
         )
 
         ps = ["pau", "o", "N", "i", "i", "a", "pau", "i", "o", "e", "U", "pau"]
@@ -541,15 +543,13 @@ def test_split_mora(self):
         )
 
     def test_pre_process(self):
-        flatten_moras, phoneme_data_list = pre_process(
-            deepcopy(self.accent_phrases_hello_hiho)
-        )
+        flatten_moras, phoneme_data_list = pre_process(_gen_hello_hiho_accent_phrases())
 
         mora_index = 0
         phoneme_index = 1
 
         self.assertTrue(is_same_phoneme(phoneme_data_list[0], Phoneme("pau")))
-        for accent_phrase in self.accent_phrases_hello_hiho:
+        for accent_phrase in _gen_hello_hiho_accent_phrases():
             moras = accent_phrase.moras
             for mora in moras:
                 self.assertEqual(flatten_moras[mora_index], mora)
@@ -580,7 +580,7 @@ def test_pre_process(self):
 
     def test_update_length(self):
         # Inputs
-        hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
+        hello_hiho = _gen_hello_hiho_accent_phrases()
         # Outputs & Indirect Outputs（yukarin_sに渡される値）
         result = self.tts_engine.update_length(hello_hiho, StyleId(1))
         yukarin_s_args = self.yukarin_s_mock.call_args[1]
@@ -593,7 +593,7 @@ def test_update_length(self):
         true_phoneme_list_1 = [0, 23, 30, 4, 28, 21, 10, 21, 42, 7]
         true_phoneme_list_2 = [0, 19, 21, 19, 30, 12, 14, 35, 6, 0]
         true_phoneme_list = true_phoneme_list_1 + true_phoneme_list_2
-        true_result = deepcopy(self.accent_phrases_hello_hiho)
+        true_result = _gen_hello_hiho_accent_phrases()
         index = 1
 
         def result_value(i: int) -> float:
@@ -632,7 +632,7 @@ def test_update_pitch(self):
         self.assertEqual(result, true_result)
 
         # Inputs
-        hello_hiho = deepcopy(self.accent_phrases_hello_hiho)
+        hello_hiho = _gen_hello_hiho_accent_phrases()
         # Outputs & Indirect Outputs（yukarin_saに渡される値）
         result = self.tts_engine.update_pitch(hello_hiho, StyleId(1))
         yukarin_sa_args = self.yukarin_sa_mock.call_args[1]
@@ -651,7 +651,7 @@ def test_update_pitch(self):
         true_accent_ends = numpy.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0])
         true_phrase_starts = numpy.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
         true_phrase_ends = numpy.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])
-        true_result = deepcopy(self.accent_phrases_hello_hiho)
+        true_result = _gen_hello_hiho_accent_phrases()
         index = 1
 
         def result_value(i: int) -> float:

From 0e4baea71f03fc6bcd0ebb2e974bbb947fb3e92c Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 7 Jan 2024 06:36:30 +0900
Subject: [PATCH 115/177] =?UTF-8?q?=E3=83=A2=E3=83=BC=E3=83=95=E3=82=A3?=
 =?UTF-8?q?=E3=83=B3=E3=82=B0=E9=96=A2=E4=BF=82=E3=81=A7=E3=80=81=E3=82=B9?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=ABID=E3=81=AA=E3=81=AE=E3=81=ABspeaker?=
 =?UTF-8?q?=E3=81=AB=E3=81=AA=E3=81=A3=E3=81=A6=E3=81=84=E3=82=8B=E3=81=A8?=
 =?UTF-8?q?=E3=81=93=E3=82=8D=E3=82=92style=5Fid=E3=81=AB=E5=A4=89?=
 =?UTF-8?q?=E6=9B=B4=20(#826)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* モーフィング関係で、スタイルIDなのにspeakerになっているところをstyle_idに変更

* NOTE化

* run.pyのとこStyleIdに

* API引数をStyleId化

* 間違えて追加してしまっていた

* get_style_id_from_deprecated周りの変更

* audio_queryのe2eテスト

* stash

* 漏れ

* さらなる漏れの修正

* StyleIdの場所移動

* StyleIdの場所変更

* pysen

* 自動import箇所が意図とあってなさそうだった

* なぜか再代入しても大丈夫なようになっていた。。。

* to depreceated_speaker

* タイポ。。
---
 README.md                                     | 19 ++--
 engine_manifest.json                          |  2 +-
 run.py                                        | 96 ++++++++++++-------
 .../engine_manifest/EngineManifest.py         |  2 +-
 voicevox_engine/metas/MetasStore.py           |  7 +-
 voicevox_engine/morphing.py                   | 32 +++----
 6 files changed, 95 insertions(+), 63 deletions(-)

diff --git a/README.md b/README.md
index a4022d39f..f30df9b4a 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,7 @@ curl -s \
 ### 読み方を AquesTalk 風記法で取得・修正
 
 #### AquesTalk 風記法
+
 <!-- NOTE: この節は静的リンクとして運用中なので変更しない方が良い(voicevox_engine#816) -->
 
 「**AquesTalk 風記法**」はカタカナと記号だけで読み方を指定する記法です。[AquesTalk 本家の記法](https://www.a-quest.com/archive/manual/siyo_onseikigou.pdf)とは一部が異なります。  
@@ -90,7 +91,7 @@ AquesTalk 風記法は次のルールに従います：
 #### AquesTalk 風記法のサンプルコード
 
 `/audio_query`のレスポンスにはエンジンが判断した読み方が[AquesTalk 風記法](#aquestalk-風記法)で記述されます。  
-これを修正することで音声の読み仮名やアクセントを制御できます。  
+これを修正することで音声の読み仮名やアクセントを制御できます。
 
 ```bash
 # 読ませたい文章をutf-8でtext.txtに書き出す
@@ -245,12 +246,12 @@ curl -s \
 - `id`は重複してはいけません
 - エンジン起動後にファイルを書き換えるとエンジンに反映されます
 
-### 2 人の話者でモーフィングするサンプルコード
+### 2 種類のスタイルでモーフィングするサンプルコード
 
-`/synthesis_morphing`では、2 人の話者でそれぞれ合成された音声を元に、モーフィングした音声を生成します。
+`/synthesis_morphing`では、2 種類のスタイルでそれぞれ合成された音声を元に、モーフィングした音声を生成します。
 
 ```bash
-echo -n "モーフィングを利用することで、２つの声を混ぜることができます。" > text.txt
+echo -n "モーフィングを利用することで、２種類の声を混ぜることができます。" > text.txt
 
 curl -s \
     -X POST \
@@ -258,7 +259,7 @@ curl -s \
     --get --data-urlencode text@text.txt \
     > query.json
 
-# 元の話者での合成結果
+# 元のスタイルでの合成結果
 curl -s \
     -H "Content-Type: application/json" \
     -X POST \
@@ -268,22 +269,22 @@ curl -s \
 
 export MORPH_RATE=0.5
 
-# 話者2人分の音声合成+WORLDによる音声分析が入るため時間が掛かるので注意
+# スタイル2種類分の音声合成+WORLDによる音声分析が入るため時間が掛かるので注意
 curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @query.json \
-    "127.0.0.1:50021/synthesis_morphing?base_speaker=0&target_speaker=1&morph_rate=$MORPH_RATE" \
+    "127.0.0.1:50021/synthesis_morphing?base_style_id=0&target_style_id=1&morph_rate=$MORPH_RATE" \
     > audio.wav
 
 export MORPH_RATE=0.9
 
-# query、base_speaker、target_speakerが同じ場合はキャッシュが使用されるため比較的高速に生成される
+# query、base_style_id、target_style_idが同じ場合はキャッシュが使用されるため比較的高速に生成される
 curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @query.json \
-    "127.0.0.1:50021/synthesis_morphing?base_speaker=0&target_speaker=1&morph_rate=$MORPH_RATE" \
+    "127.0.0.1:50021/synthesis_morphing?base_style_id=0&target_style_id=1&morph_rate=$MORPH_RATE" \
     > audio.wav
 ```
 
diff --git a/engine_manifest.json b/engine_manifest.json
index 2ceaaabea..f4a5dfaa1 100644
--- a/engine_manifest.json
+++ b/engine_manifest.json
@@ -52,7 +52,7 @@
         "synthesis_morphing" : {
             "type": "bool",
             "value": true,
-            "name": "2人の話者でモーフィングした音声を合成"
+            "name": "2種類のスタイルでモーフィングした音声を合成"
         },
         "manage_library": {
             "type": "bool",
diff --git a/run.py b/run.py
index 01fa2da11..39e401681 100644
--- a/run.py
+++ b/run.py
@@ -14,7 +14,7 @@
 from io import BytesIO, TextIOWrapper
 from pathlib import Path
 from tempfile import NamedTemporaryFile, TemporaryFile
-from typing import Annotated, Any, Optional
+from typing import Annotated, Any, Optional, TypeVar
 
 import soundfile
 import uvicorn
@@ -91,18 +91,19 @@
 )
 from voicevox_engine.utility.run_utility import decide_boolean_from_env
 
+# NOTE: Python 3.12以降で[S: StyleId | list[StyleId]]に置き換えられる
+S = TypeVar("S", StyleId, list[StyleId])
 
-def get_style_id_from_deprecated(
-    style_id: StyleId | None, speaker_id: StyleId | None
-) -> StyleId:
+
+def get_style_id_from_deprecated(style_id: S | None, deprecated_speaker: S | None) -> S:
     """
-    style_idとspeaker_id両方ともNoneかNoneでないかをチェックし、
+    style_idとspeaker両方ともNoneかNoneでないかをチェックし、
     どちらか片方しかNoneが存在しなければstyle_idを返す
     """
-    if speaker_id is not None and style_id is None:
+    if deprecated_speaker is not None and style_id is None:
         warnings.warn("speakerは非推奨です。style_idを利用してください。", stacklevel=1)
-        return speaker_id
-    elif style_id is not None and speaker_id is None:
+        return deprecated_speaker
+    elif style_id is not None and deprecated_speaker is None:
         return style_id
     raise HTTPException(
         status_code=400, detail="speakerとstyle_idが両方とも存在しないか、両方とも存在しています。"
@@ -289,7 +290,9 @@ def audio_query(
         """
         音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
         """
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         engine = get_engine(core_version)
         core = get_core(core_version)
         accent_phrases = engine.create_accent_phrases(text, style_id)
@@ -375,7 +378,9 @@ def accent_phrases(
         * アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。
         * アクセント句末に`？`(全角)を入れることにより疑問文の発音ができる。
         """
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         engine = get_engine(core_version)
         if is_kana:
             try:
@@ -403,7 +408,9 @@ def mora_data(
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         engine = get_engine(core_version)
         return engine.update_length_and_pitch(accent_phrases, style_id)
 
@@ -419,7 +426,9 @@ def mora_length(
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         engine = get_engine(core_version)
         return engine.update_length(accent_phrases, style_id)
 
@@ -435,7 +444,9 @@ def mora_pitch(
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         engine = get_engine(core_version)
         return engine.update_pitch(accent_phrases, style_id)
 
@@ -462,7 +473,9 @@ def synthesis(
         ),
         core_version: str | None = None,
     ) -> FileResponse:
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         engine = get_engine(core_version)
         wave = engine.synthesize_wave(
             query, style_id, enable_interrogative_upspeak=enable_interrogative_upspeak
@@ -499,7 +512,9 @@ def cancellable_synthesis(
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         if cancellable_engine is None:
             raise HTTPException(
                 status_code=404,
@@ -538,7 +553,9 @@ def multi_synthesis(
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
-        style_id = get_style_id_from_deprecated(style_id=style_id, speaker_id=speaker)
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
         engine = get_engine(core_version)
         sampling_rate = queries[0].outputSamplingRate
 
@@ -571,24 +588,28 @@ def multi_synthesis(
         "/morphable_targets",
         response_model=list[dict[str, MorphableTargetInfo]],
         tags=["音声合成"],
-        summary="指定した話者に対してエンジン内の話者がモーフィングが可能か判定する",
+        summary="指定したスタイルに対してエンジン内の話者がモーフィングが可能か判定する",
     )
     def morphable_targets(
-        base_speakers: list[int],  # FIXME: StyleId型にする
+        base_style_ids: list[StyleId] | None = Query(default=None),  # noqa: B008
+        base_speakers: list[StyleId] | None = Query(default=None),  # noqa: B008
         core_version: str | None = None,
     ) -> list[dict[str, MorphableTargetInfo]]:
         """
-        指定されたベース話者に対してエンジン内の各話者がモーフィング機能を利用可能か返します。
+        指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。
         モーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。
         プロパティが存在しない場合は、モーフィングが許可されているとみなします。
         返り値の話者はstring型なので注意。
         """
+        base_style_ids = get_style_id_from_deprecated(
+            style_id=base_style_ids, deprecated_speaker=base_speakers
+        )
         core = get_core(core_version)
 
         try:
             speakers = metas_store.load_combined_metas(core=core)
             morphable_targets = get_morphable_targets(
-                speakers=speakers, base_speakers=base_speakers
+                speakers=speakers, base_style_ids=base_style_ids
             )
             # jsonはint型のキーを持てないので、string型に変換する
             return [
@@ -611,19 +632,31 @@ def morphable_targets(
             }
         },
         tags=["音声合成"],
-        summary="2人の話者でモーフィングした音声を合成する",
+        summary="2種類のスタイルでモーフィングした音声を合成する",
     )
     def _synthesis_morphing(
         query: AudioQuery,
-        base_speaker: int,  # FIXME: StyleId型にする
-        target_speaker: int,
+        base_style_id: StyleId | None = Query(default=None),  # noqa: B008
+        base_speaker: (StyleId | None) = Query(  # noqa: B008
+            default=None, deprecated=True
+        ),
+        target_style_id: StyleId | None = Query(default=None),  # noqa: B008
+        target_speaker: (StyleId | None) = Query(  # noqa: B008
+            default=None, deprecated=True
+        ),
         morph_rate: float = Query(..., ge=0.0, le=1.0),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
         """
-        指定された2人の話者で音声を合成、指定した割合でモーフィングした音声を得ます。
-        モーフィングの割合は`morph_rate`で指定でき、0.0でベースの話者、1.0でターゲットの話者に近づきます。
+        指定された2種類のスタイルで音声を合成、指定した割合でモーフィングした音声を得ます。
+        モーフィングの割合は`morph_rate`で指定でき、0.0でベースのスタイル、1.0でターゲットのスタイルに近づきます。
         """
+        base_style_id = get_style_id_from_deprecated(
+            style_id=base_style_id, deprecated_speaker=base_speaker
+        )
+        target_style_id = get_style_id_from_deprecated(
+            style_id=target_style_id, deprecated_speaker=target_speaker
+        )
         engine = get_engine(core_version)
         core = get_core(core_version)
 
@@ -631,7 +664,7 @@ def _synthesis_morphing(
             speakers = metas_store.load_combined_metas(core=core)
             speaker_lookup = construct_lookup(speakers=speakers)
             is_permitted = is_synthesis_morphing_permitted(
-                speaker_lookup, base_speaker, target_speaker
+                speaker_lookup, base_style_id, target_style_id
             )
             if not is_permitted:
                 raise HTTPException(
@@ -648,8 +681,8 @@ def _synthesis_morphing(
             engine=engine,
             core=core,
             query=query,
-            base_speaker=base_speaker,
-            target_speaker=target_speaker,
+            base_style_id=base_style_id,
+            target_style_id=target_style_id,
         )
 
         morph_wave = synthesis_morphing(
@@ -1033,9 +1066,7 @@ def initialize_speaker(
         core_version: str | None = None,
     ) -> Response:
         """
-        こちらのAPIは非推奨です。`initialize_style_id`を利用してください。\n
-        指定されたspeaker_idの話者を初期化します。
-        実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。
+        こちらのAPIは非推奨です。`initialize_style_id`を利用してください。
         """
         warnings.warn(
             "使用しているAPI(/initialize_speaker)は非推奨です。/initialized_style_idを利用してください。",
@@ -1053,8 +1084,7 @@ def is_initialized_speaker(
         core_version: str | None = None,
     ) -> bool:
         """
-        こちらのAPIは非推奨です。`is_initialize_style_id`を利用してください。\n
-        指定されたspeaker_idの話者が初期化されているかどうかを返します。
+        こちらのAPIは非推奨です。`is_initialize_style_id`を利用してください。
         """
         warnings.warn(
             "使用しているAPI(/is_initialize_speaker)は非推奨です。/is_initialized_style_idを利用してください。",
diff --git a/voicevox_engine/engine_manifest/EngineManifest.py b/voicevox_engine/engine_manifest/EngineManifest.py
index 4a8f05721..a203767aa 100644
--- a/voicevox_engine/engine_manifest/EngineManifest.py
+++ b/voicevox_engine/engine_manifest/EngineManifest.py
@@ -41,7 +41,7 @@ class SupportedFeatures(BaseModel):
     adjust_intonation_scale: bool = Field(title="全体の抑揚の調整")
     adjust_volume_scale: bool = Field(title="全体の音量の調整")
     interrogative_upspeak: bool = Field(title="疑問文の自動調整")
-    synthesis_morphing: bool = Field(title="2人の話者でモーフィングした音声を合成")
+    synthesis_morphing: bool = Field(title="2種類のスタイルでモーフィングした音声を合成")
     manage_library: Optional[bool] = Field(title="音声ライブラリのインストール・アンインストール")
 
 
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index bd24fc209..2608ff4ff 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -7,6 +7,7 @@
     EngineSpeaker,
     Speaker,
     SpeakerStyle,
+    StyleId,
 )
 
 if TYPE_CHECKING:
@@ -61,7 +62,7 @@ def load_combined_metas(self, core: "CoreAdapter") -> List[Speaker]:
 
 def construct_lookup(
     speakers: List[Speaker],
-) -> Dict[int, Tuple[Speaker, SpeakerStyle]]:
+) -> Dict[StyleId, Tuple[Speaker, SpeakerStyle]]:
     """
     スタイルID に話者メタ情報・スタイルメタ情報を紐付ける対応表を生成
     Parameters
@@ -70,10 +71,10 @@ def construct_lookup(
         話者メタ情報
     Returns
     -------
-    ret : Dict[int, Tuple[Speaker, SpeakerStyle]]
+    ret : Dict[StyleId, Tuple[Speaker, SpeakerStyle]]
         スタイルID に話者メタ情報・スタイルメタ情報が紐付いた対応表
     """
-    lookup_table: dict[int, tuple[Speaker, SpeakerStyle]] = dict()
+    lookup_table: dict[StyleId, tuple[Speaker, SpeakerStyle]] = dict()
     for speaker in speakers:
         for style in speaker.styles:
             lookup_table[style.id] = (speaker, style)
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 90b97fa63..f94fbb061 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -56,23 +56,23 @@ def create_morphing_parameter(
 
 def get_morphable_targets(
     speakers: List[Speaker],
-    base_speakers: List[int],
+    base_style_ids: List[StyleId],
 ) -> List[Dict[StyleId, MorphableTargetInfo]]:
     """
     speakers: 全話者の情報
-    base_speakers: モーフィング可能か判定したいベースの話者リスト（スタイルID）
+    base_speakers: モーフィング可能か判定したいベースのスタイルIDリスト
     """
     speaker_lookup = construct_lookup(speakers)
 
     morphable_targets_arr = []
-    for base_speaker in base_speakers:
+    for base_style_id in base_style_ids:
         morphable_targets = dict()
         for style in chain.from_iterable(speaker.styles for speaker in speakers):
             morphable_targets[style.id] = MorphableTargetInfo(
                 is_morphable=is_synthesis_morphing_permitted(
                     speaker_lookup=speaker_lookup,
-                    base_speaker=base_speaker,
-                    target_speaker=style.id,
+                    base_style_id=base_style_id,
+                    target_style_id=style.id,
                 )
             )
         morphable_targets_arr.append(morphable_targets)
@@ -81,21 +81,21 @@ def get_morphable_targets(
 
 
 def is_synthesis_morphing_permitted(
-    speaker_lookup: Dict[int, Tuple[Speaker, SpeakerStyle]],
-    base_speaker: int,
-    target_speaker: int,
+    speaker_lookup: Dict[StyleId, Tuple[Speaker, SpeakerStyle]],
+    base_style_id: StyleId,
+    target_style_id: StyleId,
 ) -> bool:
     """
     指定されたstyle_idがモーフィング可能かどうか返す
     style_idが見つからない場合はStyleIdNotFoundErrorを送出する
     """
 
-    base_speaker_data = speaker_lookup[base_speaker]
-    target_speaker_data = speaker_lookup[target_speaker]
+    base_speaker_data = speaker_lookup[base_style_id]
+    target_speaker_data = speaker_lookup[target_style_id]
 
     if base_speaker_data is None or target_speaker_data is None:
         raise StyleIdNotFoundError(
-            base_speaker if base_speaker_data is None else target_speaker
+            base_style_id if base_speaker_data is None else target_style_id
         )
 
     base_speaker_info, _ = base_speaker_data
@@ -137,8 +137,8 @@ def synthesis_morphing_parameter(
     engine: TTSEngine,
     core: CoreAdapter,
     query: AudioQuery,
-    base_speaker: StyleId,
-    target_speaker: StyleId,
+    base_style_id: StyleId,
+    target_style_id: StyleId,
 ) -> MorphingParameter:
     query = deepcopy(query)
 
@@ -148,8 +148,8 @@ def synthesis_morphing_parameter(
     # WORLDに掛けるため合成はモノラルで行う
     query.outputStereo = False
 
-    base_wave = engine.synthesize_wave(query, style_id=base_speaker).astype("float")
-    target_wave = engine.synthesize_wave(query, style_id=target_speaker).astype("float")
+    base_wave = engine.synthesize_wave(query, base_style_id).astype("float")
+    target_wave = engine.synthesize_wave(query, target_style_id).astype("float")
 
     return create_morphing_parameter(
         base_wave=base_wave,
@@ -174,7 +174,7 @@ def synthesis_morphing(
 
     morph_rate : float
         モーフィングの割合
-        0.0でベースの話者、1.0でターゲットの話者に近づきます。
+        0.0でベースの音声、1.0でターゲットの音声に近づきます。
 
     Returns
     -------

From ed01d2ebcad4a1d689c488025d3ee3b7e548c1ae Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 7 Jan 2024 15:47:59 +0900
Subject: [PATCH 116/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`replace=5Fmora?=
 =?UTF-8?q?=5Fpitch`=20(#974)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `replace_mora_pitch`

* refactor: "Vowel" リネーム

* fix: lint

* fix: 改行リバート

* fix: 意図の明示

* fix: 更新
---
 test/test_tts_engine.py                    | 10 ++--
 voicevox_engine/tts_pipeline/tts_engine.py | 68 ++++++++--------------
 2 files changed, 30 insertions(+), 48 deletions(-)

diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index 9a663fb80..08ef91aed 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -23,7 +23,7 @@
     split_mora,
     to_flatten_moras,
     to_flatten_phonemes,
-    unvoiced_mora_phoneme_list,
+    unvoiced_vowel_likes,
 )
 
 TRUE_NUM_PHONEME = 45
@@ -655,11 +655,11 @@ def test_update_pitch(self):
         index = 1
 
         def result_value(i: int) -> float:
-            # unvoiced_mora_phoneme_listのPhoneme ID版
-            unvoiced_mora_phoneme_id_list = [
-                Phoneme(p).phoneme_id for p in unvoiced_mora_phoneme_list
+            # unvoiced_vowel_likesのPhoneme ID版
+            unvoiced_vowel_like_ids = [
+                Phoneme(p).phoneme_id for p in unvoiced_vowel_likes
             ]
-            if vowel_phoneme_list[i] in unvoiced_mora_phoneme_id_list:
+            if vowel_phoneme_list[i] in unvoiced_vowel_like_ids:
                 return 0
             return round(
                 (
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 575668f51..b2367d628 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -13,8 +13,8 @@
 from .mora_list import mora_phonemes_to_mora_kana
 from .text_analyzer import text_to_accent_phrases
 
-unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
-mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
+unvoiced_vowel_likes = ["A", "I", "U", "E", "O", "cl", "pau"]
+mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_vowel_likes
 
 # 疑問文語尾定数
 UPSPEAK_LENGTH = 0.15
@@ -286,14 +286,10 @@ def update_pitch(
         self, accent_phrases: list[AccentPhrase], style_id: StyleId
     ) -> list[AccentPhrase]:
         """アクセント句系列に含まれるモーラの音高属性をスタイルに合わせて更新する"""
-        # numpy.concatenateが空リストだとエラーを返すのでチェック
+        # 後続のnumpy.concatenateが空リストだとエラーになるので別処理
         if len(accent_phrases) == 0:
             return []
 
-        # phoneme
-        # AccentPhraseをすべてMoraおよびPhonemeの形に分解し、処理可能な形にする
-        flatten_moras, phoneme_data_list = pre_process(accent_phrases)
-
         # accent
         def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
             """
@@ -320,7 +316,7 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
                 (0 if accent_phrase.pause_mora is not None else []),
             ]
 
-        # accent_phrasesから、アクセントの開始位置のリストを作る
+        # アクセントの開始/終了位置リストを作る
         start_accent_list = numpy.concatenate(
             [
                 # accentはプログラミング言語におけるindexのように0始まりではなく1始まりなので、
@@ -330,8 +326,6 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
                 for accent_phrase in accent_phrases
             ]
         )
-
-        # accent_phrasesから、アクセントの終了位置のリストを作る
         end_accent_list = numpy.concatenate(
             [
                 # accentはプログラミング言語におけるindexのように0始まりではなく1始まりなので、1を引いている
@@ -340,24 +334,21 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
             ]
         )
 
-        # accent_phrasesから、アクセント句の開始位置のリストを作る
-        # これによって、yukarin_sa_forwarder内でアクセント句を区別できる
+        # アクセント句の開始/終了位置リストを作る
         start_accent_phrase_list = numpy.concatenate(
             [_create_one_hot(accent_phrase, 0) for accent_phrase in accent_phrases]
         )
-
-        # accent_phrasesから、アクセント句の終了位置のリストを作る
         end_accent_phrase_list = numpy.concatenate(
             [_create_one_hot(accent_phrase, -1) for accent_phrase in accent_phrases]
         )
 
-        # 最初と最後に0を付け加える。これによってpau(前後の無音のためのもの)を付け加えたことになる
+        # 前後無音を付加する
         start_accent_list = numpy.r_[0, start_accent_list, 0]
         end_accent_list = numpy.r_[0, end_accent_list, 0]
         start_accent_phrase_list = numpy.r_[0, start_accent_phrase_list, 0]
         end_accent_phrase_list = numpy.r_[0, end_accent_phrase_list, 0]
 
-        # アクセント・アクセント句関連のデータをyukarin_sa_forwarderに渡すための最終処理、リスト内のデータをint64に変換する
+        # キャスト
         start_accent_list = numpy.array(start_accent_list, dtype=numpy.int64)
         end_accent_list = numpy.array(end_accent_list, dtype=numpy.int64)
         start_accent_phrase_list = numpy.array(
@@ -365,28 +356,20 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
         )
         end_accent_phrase_list = numpy.array(end_accent_phrase_list, dtype=numpy.int64)
 
-        # phonemeに関するデータを取得(変換)する
-        (consonant_phoneme_data_list, vowel_phoneme_data_list) = split_mora(
-            phoneme_data_list
-        )
+        # アクセント句系列から（前後の無音含まない）モーラ系列と（前後の無音含む）音素系列を抽出する
+        moras, phonemes = pre_process(accent_phrases)
 
-        # yukarin_sa
-        # Phoneme関連のデータをyukarin_sa_forwarderに渡すための最終処理、リスト内のデータをint64に変換する
-        vowel_phoneme_list = numpy.array(
-            [p.phoneme_id for p in vowel_phoneme_data_list], dtype=numpy.int64
-        )
-        consonant_phoneme_list = numpy.array(
-            [
-                p.phoneme_id if p is not None else -1
-                for p in consonant_phoneme_data_list
-            ],
-            dtype=numpy.int64,
+        # 前後無音付加済みの音素系列から子音ID系列・母音ID系列を抽出する
+        consonants, vowels = split_mora(phonemes)
+        vowel_ids = numpy.array([p.phoneme_id for p in vowels], dtype=numpy.int64)
+        consonant_ids = numpy.array(
+            [p.phoneme_id if p else -1 for p in consonants], dtype=numpy.int64
         )
 
-        # 今までに生成された情報をyukarin_sa_forwardにかけ、推論器によってモーラごとに適切な音高(ピッチ)を割り当てる
-        f0_list = self._core.safe_yukarin_sa_forward(
-            vowel_phoneme_list,
-            consonant_phoneme_list,
+        # コアを用いてモーラ音高を生成する
+        f0 = self._core.safe_yukarin_sa_forward(
+            vowel_ids,
+            consonant_ids,
             start_accent_list,
             end_accent_list,
             start_accent_phrase_list,
@@ -394,15 +377,14 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
             style_id,
         )
 
-        # 無声母音を含むMoraに関しては、音高(ピッチ)を0にする
-        for i, p in enumerate(vowel_phoneme_data_list):
-            if p.phoneme in unvoiced_mora_phoneme_list:
-                f0_list[i] = 0
+        # 母音が無声であるモーラは音高を 0 とする
+        for i, p in enumerate(vowels):
+            if p.phoneme in unvoiced_vowel_likes:
+                f0[i] = 0
 
-        # yukarin_sa_forwarderの結果をaccent_phrasesに反映する
-        # flatten_moras変数に展開された値を変更することでコード量を削減しつつaccent_phrases内のデータを書き換えている
-        for i, mora in enumerate(flatten_moras):
-            mora.pitch = f0_list[i + 1]
+        # 更新する
+        for i, mora in enumerate(moras):
+            mora.pitch = f0[i + 1]
 
         return accent_phrases
 

From 79ea8bcbc273a976c51ae10b7441de2c2160e0e2 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 7 Jan 2024 17:18:59 +0900
Subject: [PATCH 117/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`split=5Fmora`=20?=
 =?UTF-8?q?(#984)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `split_mora`

* fix: コメント

* refactor: `split_mora` テスト

* refactor: `split_mora` テスト集約

* fix: test runner

* Update voicevox_engine/tts_pipeline/tts_engine.py

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/test_tts_engine.py                    | 94 +++++++++-------------
 voicevox_engine/tts_pipeline/tts_engine.py | 29 +++----
 2 files changed, 49 insertions(+), 74 deletions(-)

diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index 08ef91aed..fdb618015 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -34,27 +34,6 @@ def is_same_phoneme(p1: Phoneme, p2: Phoneme) -> bool:
     return p1.phoneme == p2.phoneme
 
 
-def is_same_ojt_phoneme_list(
-    p1s: list[Phoneme | None] | list[Phoneme], p2s: list[Phoneme | None] | list[Phoneme]
-) -> bool:
-    """2つのPhonemeリストで全要素ペアが同じ `.phoneme` を持つ"""
-    if len(p1s) != len(p2s):
-        return False
-
-    for p1, p2 in zip(p1s, p2s):
-        if p1 is None and p2 is None:  # None vs None -> equal
-            pass
-        elif p1 is None:  # None vs OjtOhoneme -> not equal
-            return False
-        elif p2 is None:  # OjtOhoneme vs None -> not equal
-            return False
-        elif is_same_phoneme(p1, p2):
-            pass
-        else:
-            return False
-    return True
-
-
 def yukarin_s_mock(
     length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray
 ) -> numpy.ndarray:
@@ -461,11 +440,6 @@ def test_raw_wave_to_output_wave_without_resample():
     assert numpy.allclose(wave, true_wave)
 
 
-def _gen_hello_hiho_phonemes() -> list[Phoneme]:
-    hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
-    return [Phoneme(p) for p in hello_hiho.split()]
-
-
 def _gen_hello_hiho_accent_phrases() -> list[AccentPhrase]:
     return [
         AccentPhrase(
@@ -492,6 +466,43 @@ def _gen_hello_hiho_accent_phrases() -> list[AccentPhrase]:
     ]
 
 
+def is_same_phonemes(
+    p1s: list[Phoneme] | list[Phoneme | None], p2s: list[Phoneme] | list[Phoneme | None]
+) -> bool:
+    """2つのPhonemeリストで全要素ペアが同じ `.phoneme` を持つ"""
+    if len(p1s) != len(p2s):
+        return False
+
+    for p1, p2 in zip(p1s, p2s):
+        if p1 is None and p2 is None:  # None vs None -> equal
+            pass
+        elif p1 is None:  # None vs OjtOhoneme -> not equal
+            return False
+        elif p2 is None:  # OjtOhoneme vs None -> not equal
+            return False
+        elif is_same_phoneme(p1, p2):
+            pass
+        else:
+            return False
+    return True
+
+
+def test_split_mora():
+    # Inputs
+    hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
+    hello_hiho_phonemes = [Phoneme(p) for p in hello_hiho.split()]
+    # Outputs
+    consonants, vowels = split_mora(hello_hiho_phonemes)
+    # Expects
+    cs = [None, "k", None, "n", "ch", "w", None, "h", "h", "d", "s", None]
+    vs = ["pau", "o", "N", "i", "i", "a", "pau", "i", "o", "e", "U", "pau"]
+    true_consonants = [Phoneme(p) if p else None for p in cs]
+    true_vowels = [Phoneme(p) for p in vs]
+    # Tests
+    assert is_same_phonemes(vowels, true_vowels)
+    assert is_same_phonemes(consonants, true_consonants)
+
+
 class TestTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
@@ -511,37 +522,6 @@ def test_to_flatten_moras(self):
             + true_accent_phrases_hello_hiho[1].moras,
         )
 
-    def test_split_mora(self):
-        # Outputs
-        consonant_phoneme_list, vowel_phoneme_list = split_mora(
-            _gen_hello_hiho_phonemes()
-        )
-
-        ps = ["pau", "o", "N", "i", "i", "a", "pau", "i", "o", "e", "U", "pau"]
-        true_vowel_phoneme_list = [Phoneme(p) for p in ps]
-        self.assertTrue(
-            is_same_ojt_phoneme_list(vowel_phoneme_list, true_vowel_phoneme_list)
-        )
-        self.assertTrue(
-            is_same_ojt_phoneme_list(
-                consonant_phoneme_list,
-                [
-                    None,
-                    Phoneme("k"),
-                    None,
-                    Phoneme("n"),
-                    Phoneme("ch"),
-                    Phoneme("w"),
-                    None,
-                    Phoneme("h"),
-                    Phoneme("h"),
-                    Phoneme("d"),
-                    Phoneme("s"),
-                    None,
-                ],
-            )
-        )
-
     def test_pre_process(self):
         flatten_moras, phoneme_data_list = pre_process(_gen_hello_hiho_accent_phrases())
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index b2367d628..814071920 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -53,24 +53,19 @@ def to_flatten_phonemes(moras: list[Mora]) -> list[Phoneme]:
     return phonemes
 
 
-def split_mora(
-    phoneme_list: list[Phoneme],
-) -> tuple[list[Phoneme | None], list[Phoneme]]:
+def split_mora(phonemes: list[Phoneme]) -> tuple[list[Phoneme | None], list[Phoneme]]:
     """音素系列から子音系列と母音系列を抽出する"""
-    vowel_indexes = [
-        i for i, p in enumerate(phoneme_list) if p.phoneme in mora_phoneme_list
-    ]
-    vowel_phoneme_list = [phoneme_list[i] for i in vowel_indexes]
-    # postとprevのvowel_indexの差として考えられる値は1か2
-    # 理由としてはphoneme_listは、consonant、vowelの組み合わせか、vowel一つの連続であるから
-    # 1の場合はconsonant(子音)が存在しない=母音のみ(a/i/u/e/o/N/cl/pau)で構成されるモーラ(音)である
-    # 2の場合はconsonantが存在するモーラである
-    # なので、2の場合(else)でphonemeを取り出している
-    consonant_phoneme_list = [None] + [
-        None if post - prev == 1 else phoneme_list[post - 1]
-        for prev, post in zip(vowel_indexes[:-1], vowel_indexes[1:])
-    ]
-    return consonant_phoneme_list, vowel_phoneme_list
+    consonants: list[Phoneme | None] = []
+    vowels: list[Phoneme] = []
+    for i, p in enumerate(phonemes):
+        if p.phoneme in mora_phoneme_list:
+            vowels += [p]
+            # Vowel のみのモーラの場合（Vowel が連続する場合）、Consonant を None とする
+            if i == 0 or phonemes[i - 1].phoneme in mora_phoneme_list:
+                consonants += [None]
+        else:
+            consonants += [p]
+    return consonants, vowels
 
 
 def pre_process(

From 34a59401974ef810f432e83e6fd177d26ed2a08c Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 7 Jan 2024 20:35:55 +0900
Subject: [PATCH 118/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`update=5Flength(?=
 =?UTF-8?q?)`=20=E3=82=B9=E3=83=8A=E3=83=83=E3=83=97=E3=82=B7=E3=83=A7?=
 =?UTF-8?q?=E3=83=83=E3=83=88=20(#978)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: `update_length()` スナップショット

* refactor: 入力 util

* add: MockCoreWrapper 出力

* fix: import

* fix: comments

* 変えてみました

* fix: lint

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 .../test_mocked_update_length_output.json     | 95 +++++++++++++++++++
 test/conftest.py                              | 16 ++++
 test/e2e/conftest.py                          | 15 ---
 test/test_tts_engine.py                       | 14 +++
 4 files changed, 125 insertions(+), 15 deletions(-)
 create mode 100644 test/__snapshots__/test_tts_engine/test_mocked_update_length_output.json
 create mode 100644 test/conftest.py

diff --git a/test/__snapshots__/test_tts_engine/test_mocked_update_length_output.json b/test/__snapshots__/test_tts_engine/test_mocked_update_length_output.json
new file mode 100644
index 000000000..c50c36191
--- /dev/null
+++ b/test/__snapshots__/test_tts_engine/test_mocked_update_length_output.json
@@ -0,0 +1,95 @@
+[
+  {
+    "accent": 5,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "k",
+        "consonant_length": 2.44,
+        "pitch": 0.0,
+        "text": "コ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": null,
+        "consonant_length": null,
+        "pitch": 0.0,
+        "text": "ン",
+        "vowel": "N",
+        "vowel_length": 1.25
+      },
+      {
+        "consonant": "n",
+        "consonant_length": 2.75,
+        "pitch": 0.0,
+        "text": "ニ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "ch",
+        "consonant_length": 1.62,
+        "pitch": 0.0,
+        "text": "チ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "w",
+        "consonant_length": 3.62,
+        "pitch": 0.0,
+        "text": "ワ",
+        "vowel": "a",
+        "vowel_length": 1.44
+      }
+    ],
+    "pause_mora": {
+      "consonant": null,
+      "consonant_length": null,
+      "pitch": 0.0,
+      "text": "、",
+      "vowel": "pau",
+      "vowel_length": 1.0
+    }
+  },
+  {
+    "accent": 1,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 0.0,
+        "text": "ヒ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 0.0,
+        "text": "ホ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": "d",
+        "consonant_length": 1.75,
+        "pitch": 0.0,
+        "text": "デ",
+        "vowel": "e",
+        "vowel_length": 1.88
+      },
+      {
+        "consonant": "s",
+        "consonant_length": 3.19,
+        "pitch": 0.0,
+        "text": "ス",
+        "vowel": "U",
+        "vowel_length": 1.38
+      }
+    ],
+    "pause_mora": null
+  }
+]
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 000000000..dd7920d24
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,16 @@
+import pytest
+from syrupy.assertion import SnapshotAssertion
+from syrupy.extensions.json import JSONSnapshotExtension
+
+
+@pytest.fixture
+def snapshot_json(snapshot: SnapshotAssertion) -> SnapshotAssertion:
+    """
+    syrupyでJSONをsnapshotするためのfixture。
+
+    Examples
+    --------
+    >>> def test_foo(snapshot_json: JSONSnapshotExtension):
+    >>>     assert snapshot_json == {"key": "value"}
+    """
+    return snapshot.use_extension(JSONSnapshotExtension)
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 6f7fda132..50d593f8c 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -3,8 +3,6 @@
 import pytest
 from fastapi.testclient import TestClient
 from run import generate_app
-from syrupy.assertion import SnapshotAssertion
-from syrupy.extensions.json import JSONSnapshotExtension
 
 from voicevox_engine.core_initializer import initialize_cores
 from voicevox_engine.preset import PresetManager
@@ -13,19 +11,6 @@
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
 
-@pytest.fixture
-def snapshot_json(snapshot: SnapshotAssertion) -> SnapshotAssertion:
-    """
-    syrupyでJSONをsnapshotするためのfixture。
-
-    Examples
-    --------
-    >>> def test_foo(snapshot_json: JSONSnapshotExtension):
-    >>>     assert snapshot_json == {"key": "value"}
-    """
-    return snapshot.use_extension(JSONSnapshotExtension)
-
-
 @pytest.fixture(scope="session")
 def app_params():
     cores = initialize_cores(use_gpu=False, enable_mock=True)
diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index fdb618015..351f807a1 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -1,9 +1,13 @@
+import json
 from typing import Union
 from unittest import TestCase
 from unittest.mock import Mock
 
 import numpy
+from pydantic.json import pydantic_encoder
+from syrupy.extensions.json import JSONSnapshotExtension
 
+from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
@@ -681,3 +685,13 @@ def result_value(i: int) -> float:
         numpy.testing.assert_array_equal(start_accent_phrase_list, true_phrase_starts)
         numpy.testing.assert_array_equal(end_accent_phrase_list, true_phrase_ends)
         self.assertEqual(result, true_result)
+
+
+def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> None:
+    # Inputs
+    tts_engine = TTSEngine(MockCoreWrapper())
+    hello_hiho = _gen_hello_hiho_accent_phrases()
+    # Outputs
+    result = tts_engine.update_length(hello_hiho, StyleId(1))
+    # Tests
+    assert snapshot_json == json.loads(json.dumps(result, default=pydantic_encoder))

From fafee013e2d5ceef100bcec8cd3062257711a27a Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 7 Jan 2024 21:26:25 +0900
Subject: [PATCH 119/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`create=5Faccent?=
 =?UTF-8?q?=5Fphrases()`=20=E7=95=B0=E5=B8=B8=E7=B3=BB=E3=83=86=E3=82=B9?=
 =?UTF-8?q?=E3=83=88=20(#982)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: `create_accent_phrases()` 異常系テスト

* fix: lint

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 test/test_tts_engine.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/test_tts_engine.py b/test/test_tts_engine.py
index 351f807a1..5dc7e7275 100644
--- a/test/test_tts_engine.py
+++ b/test/test_tts_engine.py
@@ -4,6 +4,7 @@
 from unittest.mock import Mock
 
 import numpy
+import pytest
 from pydantic.json import pydantic_encoder
 from syrupy.extensions.json import JSONSnapshotExtension
 
@@ -12,6 +13,7 @@
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
+from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases
 from voicevox_engine.tts_pipeline.tts_engine import (
     apply_intonation_scale,
     apply_output_sampling_rate,
@@ -30,6 +32,8 @@
     unvoiced_vowel_likes,
 )
 
+from .test_text_analyzer import stub_unknown_features_koxx
+
 TRUE_NUM_PHONEME = 45
 
 
@@ -687,6 +691,19 @@ def result_value(i: int) -> float:
         self.assertEqual(result, true_result)
 
 
+def test_create_accent_phrases_toward_unknown():
+    """`TTSEngine.create_accent_phrases()` は unknown 音素の Phoneme 化に失敗する"""
+    engine = TTSEngine(MockCoreWrapper())
+
+    # NOTE: TTSEngine.create_accent_phrases() のコールで unknown feature を得ることが難しいため、疑似再現
+    accent_phrases = text_to_accent_phrases(
+        "dummy", text_to_features=stub_unknown_features_koxx
+    )
+    with pytest.raises(ValueError) as e:
+        accent_phrases = engine.update_length_and_pitch(accent_phrases, StyleId(0))
+    assert str(e.value) == "tuple.index(x): x not in tuple"
+
+
 def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> None:
     # Inputs
     tts_engine = TTSEngine(MockCoreWrapper())

From c25c87e6cc88af3fe5ed2f26c2aeaa7edb0c1e28 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 7 Jan 2024 21:37:32 +0900
Subject: [PATCH 120/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`TTSEngine.create?=
 =?UTF-8?q?=5Faccent=5Fphrases=5Ffrom=5Fkana()`=20(#983)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

add: `TTSEngine.create_accent_phrases_from_kana()`
---
 run.py                                     | 8 ++------
 voicevox_engine/tts_pipeline/tts_engine.py | 9 +++++++++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/run.py b/run.py
index 39e401681..b4d851738 100644
--- a/run.py
+++ b/run.py
@@ -384,15 +384,11 @@ def accent_phrases(
         engine = get_engine(core_version)
         if is_kana:
             try:
-                accent_phrases = parse_kana(text)
+                return engine.create_accent_phrases_from_kana(text, style_id)
             except ParseKanaError as err:
                 raise HTTPException(
-                    status_code=400,
-                    detail=ParseKanaBadRequest(err).dict(),
+                    status_code=400, detail=ParseKanaBadRequest(err).dict()
                 )
-            accent_phrases = engine.update_length_and_pitch(accent_phrases, style_id)
-
-            return accent_phrases
         else:
             return engine.create_accent_phrases(text, style_id)
 
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 814071920..1e12eddd6 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -10,6 +10,7 @@
 from ..metas.Metas import StyleId
 from ..model import AccentPhrase, AudioQuery, Mora
 from .acoustic_feature_extractor import Phoneme
+from .kana_converter import parse_kana
 from .mora_list import mora_phonemes_to_mora_kana
 from .text_analyzer import text_to_accent_phrases
 
@@ -397,6 +398,14 @@ def create_accent_phrases(self, text: str, style_id: StyleId) -> list[AccentPhra
         accent_phrases = self.update_length_and_pitch(accent_phrases, style_id)
         return accent_phrases
 
+    def create_accent_phrases_from_kana(
+        self, kana: str, style_id: StyleId
+    ) -> list[AccentPhrase]:
+        """AquesTalk 風記法テキストからアクセント句系列を生成し、スタイルIDに基づいてその音素長・モーラ音高を更新する"""
+        accent_phrases = parse_kana(kana)
+        accent_phrases = self.update_length_and_pitch(accent_phrases, style_id)
+        return accent_phrases
+
     def synthesize_wave(
         self,
         query: AudioQuery,

From 281f5b3bbdd6e586540186ae3299f84019ef4c62 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Mon, 8 Jan 2024 02:35:57 +0900
Subject: [PATCH 121/177] =?UTF-8?q?TYP:=20numpy=E3=81=AE=E5=9E=8B=E6=83=85?=
 =?UTF-8?q?=E5=A0=B1=E3=82=92=E8=BF=BD=E5=8A=A0=20(#988)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* TYP: numpyの型情報を追加

* FIX: mock修正

* npt.NDArray -> NDArray

* integer -> int64

* np.floatingをいくつか排除

* Apply suggestions from code review

* 要らなくなったはずのgenericsの撤去

* lintミス

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 voicevox_engine/core_adapter.py               | 49 +++++-----
 voicevox_engine/core_wrapper.py               | 62 ++++++------
 voicevox_engine/dev/core/mock.py              | 17 ++--
 voicevox_engine/dev/tts_engine/mock.py        | 16 +--
 voicevox_engine/morphing.py                   | 27 +++---
 .../acoustic_feature_extractor.py             |  7 +-
 voicevox_engine/tts_pipeline/tts_engine.py    | 97 ++++++++++---------
 .../utility/connect_base64_waves.py           |  8 +-
 8 files changed, 150 insertions(+), 133 deletions(-)

diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
index 1d9bc1ed6..9197e3876 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core_adapter.py
@@ -1,7 +1,7 @@
 import threading
 
-import numpy
-from numpy import ndarray
+import numpy as np
+from numpy.typing import NDArray
 
 from .core_wrapper import CoreWrapper, OldCoreError
 from .metas.Metas import StyleId
@@ -67,55 +67,58 @@ def is_initialized_style_id_synthesis(self, style_id: StyleId) -> bool:
             return True  # コアが古い場合はどうしようもないのでTrueを返す
 
     def safe_yukarin_s_forward(
-        self, phoneme_list_s: ndarray, style_id: StyleId
-    ) -> ndarray:
+        self, phoneme_list_s: NDArray[np.int64], style_id: StyleId
+    ) -> NDArray[np.float32]:
         # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
         with self.mutex:
             phoneme_length = self.core.yukarin_s_forward(
                 length=len(phoneme_list_s),
                 phoneme_list=phoneme_list_s,
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+                style_id=np.array(style_id, dtype=np.int64).reshape(-1),
             )
         return phoneme_length
 
     def safe_yukarin_sa_forward(
         self,
-        vowel_phoneme_list: ndarray,
-        consonant_phoneme_list: ndarray,
-        start_accent_list: ndarray,
-        end_accent_list: ndarray,
-        start_accent_phrase_list: ndarray,
-        end_accent_phrase_list: ndarray,
+        vowel_phoneme_list: NDArray[np.int64],
+        consonant_phoneme_list: NDArray[np.int64],
+        start_accent_list: NDArray[np.int64],
+        end_accent_list: NDArray[np.int64],
+        start_accent_phrase_list: NDArray[np.int64],
+        end_accent_phrase_list: NDArray[np.int64],
         style_id: StyleId,
-    ) -> ndarray:
+    ) -> NDArray[np.float32]:
         # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
         with self.mutex:
             f0_list = self.core.yukarin_sa_forward(
                 length=vowel_phoneme_list.shape[0],
-                vowel_phoneme_list=vowel_phoneme_list[numpy.newaxis],
-                consonant_phoneme_list=consonant_phoneme_list[numpy.newaxis],
-                start_accent_list=start_accent_list[numpy.newaxis],
-                end_accent_list=end_accent_list[numpy.newaxis],
-                start_accent_phrase_list=start_accent_phrase_list[numpy.newaxis],
-                end_accent_phrase_list=end_accent_phrase_list[numpy.newaxis],
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+                vowel_phoneme_list=vowel_phoneme_list[np.newaxis],
+                consonant_phoneme_list=consonant_phoneme_list[np.newaxis],
+                start_accent_list=start_accent_list[np.newaxis],
+                end_accent_list=end_accent_list[np.newaxis],
+                start_accent_phrase_list=start_accent_phrase_list[np.newaxis],
+                end_accent_phrase_list=end_accent_phrase_list[np.newaxis],
+                style_id=np.array(style_id, dtype=np.int64).reshape(-1),
             )[0]
         return f0_list
 
     def safe_decode_forward(
-        self, phoneme: ndarray, f0: ndarray, style_id: StyleId
-    ) -> tuple[ndarray, int]:
+        self,
+        phoneme: NDArray[np.float32],
+        f0: NDArray[np.float32],
+        style_id: StyleId,
+    ) -> tuple[NDArray[np.float32], int]:
         # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
         with self.mutex:
             wave = self.core.decode_forward(
                 length=phoneme.shape[0],
                 phoneme_size=phoneme.shape[1],
-                f0=f0[:, numpy.newaxis],
+                f0=f0[:, np.newaxis],
                 phoneme=phoneme,
-                style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
+                style_id=np.array(style_id, dtype=np.int64).reshape(-1),
             )
         sr_wave = self.default_sampling_rate
         return wave, sr_wave
diff --git a/voicevox_engine/core_wrapper.py b/voicevox_engine/core_wrapper.py
index 6ba4eeadc..83fa5f417 100644
--- a/voicevox_engine/core_wrapper.py
+++ b/voicevox_engine/core_wrapper.py
@@ -8,6 +8,7 @@
 from typing import Literal
 
 import numpy as np
+from numpy.typing import NDArray
 
 
 class OldCoreError(Exception):
@@ -525,21 +526,24 @@ def metas(self) -> str:
         return self.core.metas().decode("utf-8")
 
     def yukarin_s_forward(
-        self, length: int, phoneme_list: np.ndarray, style_id: np.ndarray
-    ) -> np.ndarray:
+        self,
+        length: int,
+        phoneme_list: NDArray[np.int64],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
         """
         音素列から、音素ごとの長さを求める関数
         Parameters
         ----------
         length : int
             音素列の長さ
-        phoneme_list : np.ndarray
+        phoneme_list : NDArray[np.int64]
             音素列
-        style_id : np.ndarray
+        style_id : NDArray[np.int64]
             スタイル番号
         Returns
         -------
-        output : np.ndarray
+        output : NDArray[np.float32]
             音素ごとの長さ
         """
         output = np.zeros((length,), dtype=np.float32)
@@ -556,37 +560,37 @@ def yukarin_s_forward(
     def yukarin_sa_forward(
         self,
         length: int,
-        vowel_phoneme_list: np.ndarray,
-        consonant_phoneme_list: np.ndarray,
-        start_accent_list: np.ndarray,
-        end_accent_list: np.ndarray,
-        start_accent_phrase_list: np.ndarray,
-        end_accent_phrase_list: np.ndarray,
-        style_id: np.ndarray,
-    ) -> np.ndarray:
+        vowel_phoneme_list: NDArray[np.int64],
+        consonant_phoneme_list: NDArray[np.int64],
+        start_accent_list: NDArray[np.int64],
+        end_accent_list: NDArray[np.int64],
+        start_accent_phrase_list: NDArray[np.int64],
+        end_accent_phrase_list: NDArray[np.int64],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
         """
         モーラごとの音素列とアクセント情報から、モーラごとの音高を求める関数
         Parameters
         ----------
         length : int
             モーラ列の長さ
-        vowel_phoneme_list : np.ndarray
+        vowel_phoneme_list : NDArray[np.int64]
             母音の音素列
-        consonant_phoneme_list : np.ndarray
+        consonant_phoneme_list : NDArray[np.int64]
             子音の音素列
-        start_accent_list : np.ndarray
+        start_accent_list : NDArray[np.int64]
         アクセントの開始位置
-        end_accent_list : np.ndarray
+        end_accent_list : NDArray[np.int64]
             アクセントの終了位置
-        start_accent_phrase_list : np.ndarray
+        start_accent_phrase_list : NDArray[np.int64]
             アクセント句の開始位置
-        end_accent_phrase_list : np.ndarray
+        end_accent_phrase_list : NDArray[np.int64]
             アクセント句の終了位置
-        style_id : np.ndarray
+        style_id : NDArray[np.int64]
             スタイル番号
         Returns
         -------
-        output : np.ndarray
+        output : NDArray[np.float32]
             モーラごとの音高
         """
         output = np.empty(
@@ -615,10 +619,10 @@ def decode_forward(
         self,
         length: int,
         phoneme_size: int,
-        f0: np.ndarray,
-        phoneme: np.ndarray,
-        style_id: np.ndarray,
-    ) -> np.ndarray:
+        f0: NDArray[np.float32],
+        phoneme: NDArray[np.float32],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
         """
         フレームごとの音素と音高から波形を求める関数
         Parameters
@@ -627,15 +631,15 @@ def decode_forward(
             フレームの長さ
         phoneme_size : int
             音素の種類数
-        f0 : np.ndarray
+        f0 : NDArray[np.float32]
             フレームごとの音高
-        phoneme : np.ndarray
+        phoneme : NDArray[np.float32]
             フレームごとの音素
-        style_id : np.ndarray
+        style_id : NDArray[np.int64]
             スタイル番号
         Returns
         -------
-        output : np.ndarray
+        output : NDArray[np.float32]
             音声波形
         """
 
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 31918e0f3..51b06db28 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -1,8 +1,9 @@
 import json
 from pathlib import Path
 
-import numpy
+import numpy as np
 from numpy import ndarray
+from numpy.typing import NDArray
 
 from ...core_wrapper import CoreWrapper
 
@@ -65,13 +66,13 @@ def metas(self) -> str:
 
     def yukarin_s_forward(
         self, length: int, phoneme_list: ndarray, style_id: ndarray
-    ) -> ndarray:
+    ) -> NDArray[np.floating]:
         """音素系列サイズ・音素ID系列・スタイルIDから音素長系列を生成する"""
         result = []
         # mockとしての適当な処理、特に意味はない
         for i in range(length):
             result.append(round((phoneme_list[i] * 0.0625 + style_id).item(), 2))
-        return numpy.array(result)
+        return np.array(result)
 
     def yukarin_sa_forward(
         self,
@@ -83,7 +84,7 @@ def yukarin_sa_forward(
         start_accent_phrase_list: ndarray,
         end_accent_phrase_list: ndarray,
         style_id: ndarray,
-    ) -> ndarray:
+    ) -> NDArray[np.floating]:
         """モーラ系列サイズ・母音系列・子音系列・アクセント位置・アクセント句区切り・スタイルIDからモーラ音高系列を生成する"""
         assert length > 1, "前後無音を必ず付与しなければならない"
 
@@ -107,7 +108,7 @@ def yukarin_sa_forward(
                     2,
                 )
             )
-        return numpy.array(result)[numpy.newaxis]
+        return np.array(result)[np.newaxis]
 
     def decode_forward(
         self,
@@ -116,15 +117,15 @@ def decode_forward(
         f0: ndarray,
         phoneme: ndarray,
         style_id: ndarray,
-    ) -> ndarray:
+    ) -> NDArray[np.floating]:
         """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDからダミー音声波形を生成する"""
         # 入力値を反映し、長さが 256 倍であるダミー配列を出力する
         result: list[ndarray] = []
         for i in range(length):
             result += [
-                (f0[i, 0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size) + style_id)
+                (f0[i, 0] * (np.where(phoneme[i] == 1)[0] / phoneme_size) + style_id)
             ] * 256
-        return numpy.array(result)
+        return np.array(result)
 
     def supported_devices(self):
         return json.dumps(
diff --git a/voicevox_engine/dev/tts_engine/mock.py b/voicevox_engine/dev/tts_engine/mock.py
index c91e06aa6..e85c44aa5 100644
--- a/voicevox_engine/dev/tts_engine/mock.py
+++ b/voicevox_engine/dev/tts_engine/mock.py
@@ -1,8 +1,9 @@
 import copy
 from logging import getLogger
-from typing import Any, Dict
+from typing import Any
 
 import numpy as np
+from numpy.typing import NDArray
 from pyopenjtalk import tts
 from soxr import resample
 
@@ -24,7 +25,7 @@ def synthesize_wave(
         query: AudioQuery,
         style_id: StyleId,
         enable_interrogative_upspeak: bool = True,
-    ) -> np.ndarray:
+    ) -> NDArray[np.float32]:
         """音声合成用のクエリに含まれる読み仮名に基づいてOpenJTalkで音声波形を生成する"""
         # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
         query = copy.deepcopy(query)
@@ -38,9 +39,9 @@ def synthesize_wave(
         # volume
         wave *= query.volumeScale
 
-        return wave.astype("int16")
+        return wave
 
-    def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray:
+    def forward(self, text: str, **kwargs: dict[str, Any]) -> NDArray[np.float32]:
         """
         forward tts via pyopenjtalk.tts()
         参照→TTSEngine のdocstring [Mock]
@@ -52,7 +53,7 @@ def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray:
 
         Returns
         -------
-        wave [npt.NDArray[np.int16]]
+        wave [NDArray[np.float32]]
             音声波形データをNumPy配列で返します
 
         Note
@@ -63,10 +64,11 @@ def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray:
         dtype=np.float64, 16 bit, mono 48000 Hz
 
         # resampleの説明
-        非モック実装（decode_forward）と合わせるために、出力を24kHzに変換した。
+        非モック実装（decode_forward）と合わせるために、出力を24kHz、32bit浮動小数に変換した。
         """
         logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
         logger.info("[Mock] input text: %s" % text)
         wave, sr = tts(text)
+        wave /= 2**15
         wave = resample(wave, 48000, 24000)
-        return wave
+        return wave.astype(np.float32)
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index f94fbb061..e5d7cbd7e 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -1,10 +1,10 @@
 from copy import deepcopy
 from dataclasses import dataclass
 from itertools import chain
-from typing import Dict, List, Tuple
 
 import numpy as np
 import pyworld as pw
+from numpy.typing import NDArray
 from soxr import resample
 
 from .core_adapter import CoreAdapter
@@ -19,20 +19,19 @@
 from .tts_pipeline import TTSEngine
 
 
-# FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398  # noqa
 @dataclass(frozen=True)
 class MorphingParameter:
     fs: int
     frame_period: float
-    base_f0: np.ndarray
-    base_aperiodicity: np.ndarray
-    base_spectrogram: np.ndarray
-    target_spectrogram: np.ndarray
+    base_f0: NDArray[np.double]
+    base_aperiodicity: NDArray[np.double]
+    base_spectrogram: NDArray[np.double]
+    target_spectrogram: NDArray[np.double]
 
 
 def create_morphing_parameter(
-    base_wave: np.ndarray,
-    target_wave: np.ndarray,
+    base_wave: NDArray[np.double],
+    target_wave: NDArray[np.double],
     fs: int,
 ) -> MorphingParameter:
     frame_period = 1.0
@@ -55,9 +54,9 @@ def create_morphing_parameter(
 
 
 def get_morphable_targets(
-    speakers: List[Speaker],
-    base_style_ids: List[StyleId],
-) -> List[Dict[StyleId, MorphableTargetInfo]]:
+    speakers: list[Speaker],
+    base_style_ids: list[StyleId],
+) -> list[dict[StyleId, MorphableTargetInfo]]:
     """
     speakers: 全話者の情報
     base_speakers: モーフィング可能か判定したいベースのスタイルIDリスト
@@ -81,7 +80,7 @@ def get_morphable_targets(
 
 
 def is_synthesis_morphing_permitted(
-    speaker_lookup: Dict[StyleId, Tuple[Speaker, SpeakerStyle]],
+    speaker_lookup: dict[StyleId, tuple[Speaker, SpeakerStyle]],
     base_style_id: StyleId,
     target_style_id: StyleId,
 ) -> bool:
@@ -163,7 +162,7 @@ def synthesis_morphing(
     morph_rate: float,
     output_fs: int,
     output_stereo: bool = False,
-) -> np.ndarray:
+) -> NDArray[np.float64]:
     """
     指定した割合で、パラメータをもとにモーフィングした音声を生成します。
 
@@ -178,7 +177,7 @@ def synthesis_morphing(
 
     Returns
     -------
-    generated : np.ndarray
+    generated : NDArray[np.float64]
         モーフィングした音声
 
     Raises
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index a3e774ec4..6c861e16a 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -1,6 +1,7 @@
 from typing import Literal
 
-import numpy
+import numpy as np
+from numpy.typing import NDArray
 
 # NOTE: `Vowel` は母音 (a/i/u/e/o の有声・無声) + 無音 pau + 撥音 N ("ん") + 促音 cl ("っ")
 # NOTE: 型の名称は暫定的
@@ -121,8 +122,8 @@ def phoneme_id(self) -> int:
         return self._PHONEME_LIST.index(self.phoneme)
 
     @property
-    def onehot(self):
+    def onehot(self) -> NDArray[np.float32]:
         """音素onehotベクトルを取得する"""
-        vec = numpy.zeros(self._NUM_PHONEME, dtype=numpy.float32)
+        vec = np.zeros(self._NUM_PHONEME, dtype=np.float32)
         vec[self.phoneme_id] = 1.0
         return vec
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 1e12eddd6..69692ec40 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -1,8 +1,8 @@
 import copy
 import math
 
-import numpy
-from numpy import ndarray
+import numpy as np
+from numpy.typing import NDArray
 from soxr import resample
 
 from ..core_adapter import CoreAdapter
@@ -131,7 +131,9 @@ def apply_speed_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def count_frame_per_unit(moras: list[Mora]) -> tuple[ndarray, ndarray]:
+def count_frame_per_unit(
+    moras: list[Mora],
+) -> tuple[NDArray[np.integer], NDArray[np.integer]]:
     """
     音素あたり・モーラあたりのフレーム長を算出する
     Parameters
@@ -140,9 +142,9 @@ def count_frame_per_unit(moras: list[Mora]) -> tuple[ndarray, ndarray]:
         モーラ系列
     Returns
     -------
-    frame_per_phoneme : ndarray
+    frame_per_phoneme : NDArray[np.integer]
         音素あたりのフレーム長。端数丸め。shape = (Phoneme,)
-    frame_per_mora : ndarray
+    frame_per_mora : NDArray[np.integer]
         モーラあたりのフレーム長。端数丸め。shape = (Mora,)
     """
     frame_per_phoneme: list[int] = []
@@ -159,13 +161,13 @@ def count_frame_per_unit(moras: list[Mora]) -> tuple[ndarray, ndarray]:
         frame_per_phoneme += [vowel_frames]
         frame_per_mora += [mora_frames]
 
-    return numpy.array(frame_per_phoneme), numpy.array(frame_per_mora)
+    return np.array(frame_per_phoneme), np.array(frame_per_mora)
 
 
 def _to_frame(sec: float) -> int:
     FRAMERATE = 93.75  # 24000 / 256 [frame/sec]
     # NOTE: `round` は偶数丸め。移植時に取扱い注意。詳細は voicevox_engine#552
-    return numpy.round(sec * FRAMERATE).astype(numpy.int32).item()
+    return np.round(sec * FRAMERATE).astype(np.int32).item()
 
 
 def apply_pitch_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
@@ -179,22 +181,21 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     """モーラ系列へ音声合成用のクエリがもつ抑揚スケール（`intonationScale`）を適用する"""
     # 有声音素 (f0>0) の平均値に対する乖離度をスケール
     voiced = list(filter(lambda mora: mora.pitch > 0, moras))
-    mean_f0 = numpy.mean(list(map(lambda mora: mora.pitch, voiced))).item()
+    mean_f0 = np.mean(list(map(lambda mora: mora.pitch, voiced))).item()
     if mean_f0 != math.nan:  # 空リスト -> NaN
         for mora in voiced:
             mora.pitch = (mora.pitch - mean_f0) * query.intonationScale + mean_f0
     return moras
 
 
-def apply_volume_scale(wave: numpy.ndarray, query: AudioQuery) -> numpy.ndarray:
+def apply_volume_scale(wave: np.ndarray, query: AudioQuery) -> NDArray[np.floating]:
     """音声波形へ音声合成用のクエリがもつ音量スケール（`volumeScale`）を適用する"""
-    wave *= query.volumeScale
-    return wave
+    return wave * query.volumeScale
 
 
 def apply_output_sampling_rate(
-    wave: ndarray, sr_wave: int, query: AudioQuery
-) -> ndarray:
+    wave: NDArray[np.floating], sr_wave: float, query: AudioQuery
+) -> NDArray[np.floating]:
     """音声波形へ音声合成用のクエリがもつ出力サンプリングレート（`outputSamplingRate`）を適用する"""
     # サンプリングレート一致のときはスルー
     if sr_wave == query.outputSamplingRate:
@@ -203,14 +204,18 @@ def apply_output_sampling_rate(
     return wave
 
 
-def apply_output_stereo(wave: ndarray, query: AudioQuery) -> ndarray:
+def apply_output_stereo(
+    wave: NDArray[np.floating], query: AudioQuery
+) -> NDArray[np.floating]:
     """音声波形へ音声合成用のクエリがもつステレオ出力設定（`outputStereo`）を適用する"""
     if query.outputStereo:
-        wave = numpy.array([wave, wave]).T
+        wave = np.array([wave, wave]).T
     return wave
 
 
-def query_to_decoder_feature(query: AudioQuery) -> tuple[ndarray, ndarray]:
+def query_to_decoder_feature(
+    query: AudioQuery,
+) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
     """音声合成用のクエリからフレームごとの音素 (shape=(フレーム長, 音素数)) と音高 (shape=(フレーム長,)) を得る"""
     moras = to_flatten_moras(query.accent_phrases)
 
@@ -221,18 +226,20 @@ def query_to_decoder_feature(query: AudioQuery) -> tuple[ndarray, ndarray]:
     moras = apply_intonation_scale(moras, query)
 
     # 表現を変更する（音素クラス → 音素 onehot ベクトル、モーラクラス → 音高スカラ）
-    phoneme = numpy.stack([p.onehot for p in to_flatten_phonemes(moras)])
-    f0 = numpy.array([mora.pitch for mora in moras], dtype=numpy.float32)
+    phoneme = np.stack([p.onehot for p in to_flatten_phonemes(moras)])
+    f0 = np.array([mora.pitch for mora in moras], dtype=np.float32)
 
     # 時間スケールを変更する（音素・モーラ → フレーム）
     frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
-    phoneme = numpy.repeat(phoneme, frame_per_phoneme, axis=0)
-    f0 = numpy.repeat(f0, frame_per_mora)
+    phoneme = np.repeat(phoneme, frame_per_phoneme, axis=0)
+    f0 = np.repeat(f0, frame_per_mora)
 
     return phoneme, f0
 
 
-def raw_wave_to_output_wave(query: AudioQuery, wave: ndarray, sr_wave: int) -> ndarray:
+def raw_wave_to_output_wave(
+    query: AudioQuery, wave: np.ndarray, sr_wave: int
+) -> NDArray[np.floating]:
     """生音声波形に音声合成用のクエリを適用して出力音声波形を生成する"""
     wave = apply_volume_scale(wave, query)
     wave = apply_output_sampling_rate(wave, sr_wave, query)
@@ -260,7 +267,7 @@ def update_length(
         phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]
 
         # 音素クラスから音素IDスカラへ表現を変換する
-        phoneme_ids = numpy.array([p.phoneme_id for p in phonemes], dtype=numpy.int64)
+        phoneme_ids = np.array([p.phoneme_id for p in phonemes], dtype=np.int64)
 
         # コアを用いて音素長を生成する
         phoneme_lengths = self._core.safe_yukarin_s_forward(phoneme_ids, style_id)
@@ -287,9 +294,11 @@ def update_pitch(
             return []
 
         # accent
-        def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
+        def _create_one_hot(
+            accent_phrase: AccentPhrase, position: int
+        ) -> NDArray[np.floating]:
             """
-            単位行列(numpy.eye)を応用し、accent_phrase内でone hotな配列(リスト)を作る
+            単位行列(np.eye)を応用し、accent_phrase内でone hotな配列(リスト)を作る
             例えば、accent_phraseのmorasの長さが12、positionが1なら
             [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
             morasの長さが同じく12、positionが-1なら
@@ -304,16 +313,16 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
                 one hotにするindex
             Returns
             -------
-            one_hot : numpy.ndarray
+            one_hot : NDArray[np.floating]
                 one hotな配列(リスト)
             """
-            return numpy.r_[
-                numpy.eye(len(accent_phrase.moras))[position],
+            return np.r_[
+                np.eye(len(accent_phrase.moras))[position],
                 (0 if accent_phrase.pause_mora is not None else []),
             ]
 
         # アクセントの開始/終了位置リストを作る
-        start_accent_list = numpy.concatenate(
+        start_accent_list = np.concatenate(
             [
                 # accentはプログラミング言語におけるindexのように0始まりではなく1始まりなので、
                 # accentが1の場合は0番目を指定している
@@ -322,7 +331,7 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
                 for accent_phrase in accent_phrases
             ]
         )
-        end_accent_list = numpy.concatenate(
+        end_accent_list = np.concatenate(
             [
                 # accentはプログラミング言語におけるindexのように0始まりではなく1始まりなので、1を引いている
                 _create_one_hot(accent_phrase, accent_phrase.accent - 1)
@@ -331,35 +340,33 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
         )
 
         # アクセント句の開始/終了位置リストを作る
-        start_accent_phrase_list = numpy.concatenate(
+        start_accent_phrase_list = np.concatenate(
             [_create_one_hot(accent_phrase, 0) for accent_phrase in accent_phrases]
         )
-        end_accent_phrase_list = numpy.concatenate(
+        end_accent_phrase_list = np.concatenate(
             [_create_one_hot(accent_phrase, -1) for accent_phrase in accent_phrases]
         )
 
         # 前後無音を付加する
-        start_accent_list = numpy.r_[0, start_accent_list, 0]
-        end_accent_list = numpy.r_[0, end_accent_list, 0]
-        start_accent_phrase_list = numpy.r_[0, start_accent_phrase_list, 0]
-        end_accent_phrase_list = numpy.r_[0, end_accent_phrase_list, 0]
+        start_accent_list = np.r_[0, start_accent_list, 0]
+        end_accent_list = np.r_[0, end_accent_list, 0]
+        start_accent_phrase_list = np.r_[0, start_accent_phrase_list, 0]
+        end_accent_phrase_list = np.r_[0, end_accent_phrase_list, 0]
 
         # キャスト
-        start_accent_list = numpy.array(start_accent_list, dtype=numpy.int64)
-        end_accent_list = numpy.array(end_accent_list, dtype=numpy.int64)
-        start_accent_phrase_list = numpy.array(
-            start_accent_phrase_list, dtype=numpy.int64
-        )
-        end_accent_phrase_list = numpy.array(end_accent_phrase_list, dtype=numpy.int64)
+        start_accent_list = np.array(start_accent_list, dtype=np.int64)
+        end_accent_list = np.array(end_accent_list, dtype=np.int64)
+        start_accent_phrase_list = np.array(start_accent_phrase_list, dtype=np.int64)
+        end_accent_phrase_list = np.array(end_accent_phrase_list, dtype=np.int64)
 
         # アクセント句系列から（前後の無音含まない）モーラ系列と（前後の無音含む）音素系列を抽出する
         moras, phonemes = pre_process(accent_phrases)
 
         # 前後無音付加済みの音素系列から子音ID系列・母音ID系列を抽出する
         consonants, vowels = split_mora(phonemes)
-        vowel_ids = numpy.array([p.phoneme_id for p in vowels], dtype=numpy.int64)
-        consonant_ids = numpy.array(
-            [p.phoneme_id if p else -1 for p in consonants], dtype=numpy.int64
+        vowel_ids = np.array([p.phoneme_id for p in vowels], dtype=np.int64)
+        consonant_ids = np.array(
+            [p.phoneme_id if p else -1 for p in consonants], dtype=np.int64
         )
 
         # コアを用いてモーラ音高を生成する
@@ -411,7 +418,7 @@ def synthesize_wave(
         query: AudioQuery,
         style_id: StyleId,
         enable_interrogative_upspeak: bool = True,
-    ) -> ndarray:
+    ) -> NDArray[np.floating]:
         """音声合成用のクエリ・スタイルID・疑問文語尾自動調整フラグに基づいて音声波形を生成する"""
         # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
         query = copy.deepcopy(query)
diff --git a/voicevox_engine/utility/connect_base64_waves.py b/voicevox_engine/utility/connect_base64_waves.py
index 900211e3c..287e14078 100644
--- a/voicevox_engine/utility/connect_base64_waves.py
+++ b/voicevox_engine/utility/connect_base64_waves.py
@@ -1,9 +1,9 @@
 import base64
 import io
-from typing import List, Tuple
 
 import numpy as np
 import soundfile
+from numpy.typing import NDArray
 from soxr import resample
 
 
@@ -12,7 +12,7 @@ def __init__(self, message: str):
         self.message = message
 
 
-def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]:
+def decode_base64_waves(waves: list[str]) -> list[tuple[NDArray[np.float64], int]]:
     """
     base64エンコードされた複数のwavデータをデコードする
     Parameters
@@ -21,7 +21,7 @@ def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]:
         base64エンコードされたwavデータのリスト
     Returns
     -------
-    waves_nparray_sr: List[Tuple[np.ndarray, int]]
+    waves_nparray_sr: List[Tuple[NDArray[np.float64], int]]
         (NumPy配列の音声波形データ, サンプリングレート) 形式のタプルのリスト
     """
     if len(waves) == 0:
@@ -42,7 +42,7 @@ def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]:
     return waves_nparray_sr
 
 
-def connect_base64_waves(waves: List[str]) -> Tuple[np.ndarray, int]:
+def connect_base64_waves(waves: list[str]) -> tuple[NDArray[np.float64], int]:
     waves_nparray_sr = decode_base64_waves(waves)
 
     max_sampling_rate = max([sr for _, sr in waves_nparray_sr])

From 0503bd3b76086b687222d55827f88ac438c6f1df Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Mon, 8 Jan 2024 19:06:37 +0900
Subject: [PATCH 122/177] =?UTF-8?q?FIX:=20morphable=5Ftargets=E3=82=A8?=
 =?UTF-8?q?=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3=E3=83=88=E3=81=AE?=
 =?UTF-8?q?API=E3=81=8C=E5=A4=89=E3=82=8F=E3=81=A3=E3=81=A6=E3=81=97?=
 =?UTF-8?q?=E3=81=BE=E3=81=A3=E3=81=A6=E3=81=84=E3=81=9F=E3=81=AE=E3=81=A7?=
 =?UTF-8?q?=E4=BF=AE=E6=AD=A3=20(#991)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 run.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/run.py b/run.py
index b4d851738..a483b6b1c 100644
--- a/run.py
+++ b/run.py
@@ -587,9 +587,7 @@ def multi_synthesis(
         summary="指定したスタイルに対してエンジン内の話者がモーフィングが可能か判定する",
     )
     def morphable_targets(
-        base_style_ids: list[StyleId] | None = Query(default=None),  # noqa: B008
-        base_speakers: list[StyleId] | None = Query(default=None),  # noqa: B008
-        core_version: str | None = None,
+        base_style_ids: list[StyleId], core_version: str | None = None
     ) -> list[dict[str, MorphableTargetInfo]]:
         """
         指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。
@@ -597,9 +595,6 @@ def morphable_targets(
         プロパティが存在しない場合は、モーフィングが許可されているとみなします。
         返り値の話者はstring型なので注意。
         """
-        base_style_ids = get_style_id_from_deprecated(
-            style_id=base_style_ids, deprecated_speaker=base_speakers
-        )
         core = get_core(core_version)
 
         try:

From 896bc3e92da3ccd61caa7a1cb01b52504f50bbe9 Mon Sep 17 00:00:00 2001
From: tomoish <103555868+tomoish@users.noreply.github.com>
Date: Tue, 9 Jan 2024 01:41:44 +0900
Subject: [PATCH 123/177] =?UTF-8?q?cors=5Fpolicy=5Fmode,=20allow=5Forigin?=
 =?UTF-8?q?=E3=81=AE=E5=84=AA=E5=85=88=E5=BA=A6=E3=81=AB=E3=81=A4=E3=81=84?=
 =?UTF-8?q?=E3=81=A6help=E3=82=AA=E3=83=97=E3=82=B7=E3=83=A7=E3=83=B3?=
 =?UTF-8?q?=E3=81=AB=E8=BF=BD=E5=8A=A0=20(#985)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update help option

* Update help option in README

* Fix README.md
---
 README.md | 2 ++
 run.py    | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f30df9b4a..6b9fb7cbb 100644
--- a/README.md
+++ b/README.md
@@ -363,8 +363,10 @@ options:
   --output_log_utf8     指定するとログ出力をUTF-8でおこないます。指定しないと、代わりに環境変数 VV_OUTPUT_LOG_UTF8 の値が使われます。VV_OUTPUT_LOG_UTF8 の値が1の場合はUTF-8で、0または空文字、値がない場合は環境によって自動的に決定されます。
   --cors_policy_mode {CorsPolicyMode.all,CorsPolicyMode.localapps}
                         CORSの許可モード。allまたはlocalappsが指定できます。allはすべてを許可します。localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。その他のオリジンはallow_originオプションで追加できます。デフォルトはlocalapps。
+                        このオプションは--setting_fileで指定される設定ファイルよりも優先されます。
   --allow_origin [ALLOW_ORIGIN ...]
                         許可するオリジンを指定します。スペースで区切ることで複数指定できます。
+                        このオプションは--setting_fileで指定される設定ファイルよりも優先されます。
   --setting_file SETTING_FILE
                         設定ファイルを指定できます。
   --preset_file PRESET_FILE
diff --git a/run.py b/run.py
index a483b6b1c..91b6ae068 100644
--- a/run.py
+++ b/run.py
@@ -1464,11 +1464,17 @@ def main() -> None:
             "CORSの許可モード。allまたはlocalappsが指定できます。allはすべてを許可します。"
             "localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。"
             "その他のオリジンはallow_originオプションで追加できます。デフォルトはlocalapps。"
+            "このオプションは--setting_fileで指定される設定ファイルよりも優先されます。"
         ),
     )
 
     parser.add_argument(
-        "--allow_origin", nargs="*", help="許可するオリジンを指定します。スペースで区切ることで複数指定できます。"
+        "--allow_origin",
+        nargs="*",
+        help=(
+            "許可するオリジンを指定します。スペースで区切ることで複数指定できます。"
+            "このオプションは--setting_fileで指定される設定ファイルよりも優先されます。"
+        ),
     )
 
     parser.add_argument(

From e9d5f0a31bc8e8817ccbb50591e61c3a2ca10a7c Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 9 Jan 2024 02:04:31 +0900
Subject: [PATCH 124/177] =?UTF-8?q?=E8=A8=AD=E5=AE=9A=E3=83=9A=E3=83=BC?=
 =?UTF-8?q?=E3=82=B8=E3=82=92Vue=E3=81=AB=E3=80=81`POST=20/setting`?=
 =?UTF-8?q?=E3=81=A7html=E3=81=8C=E8=BF=94=E3=82=89=E3=81=AA=E3=81=84?=
 =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4=E3=80=81=E3=83=87?=
 =?UTF-8?q?=E3=82=B6=E3=82=A4=E3=83=B3=E8=AA=BF=E6=95=B4=20(#885)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* コミット

* stash

* 一通りの機能を実装

* 全部CDNに

* promise化

* 微調整

* デザインの調整

* 設定を更新します。

* 読み込み中です。表示には数秒かかることがあります。

* display: none

* mypy

* フェードインにする
---
 run.py              |  34 ++-
 ui_template/ui.html | 601 +++++++++++++++++++++++++-------------------
 2 files changed, 365 insertions(+), 270 deletions(-)

diff --git a/run.py b/run.py
index 91b6ae068..97123ad60 100644
--- a/run.py
+++ b/run.py
@@ -243,7 +243,11 @@ def check_disabled_mutable_api():
 
     metas_store = MetasStore(root_dir / "speaker_info")
 
-    setting_ui_template = Jinja2Templates(directory=internal_root() / "ui_template")
+    setting_ui_template = Jinja2Templates(
+        directory=internal_root() / "ui_template",
+        variable_start_string="<JINJA_PRE>",
+        variable_end_string="<JINJA_POST>",
+    )
 
     # キャッシュを有効化
     # モジュール側でlru_cacheを指定するとキャッシュを制御しにくいため、HTTPサーバ側で指定する
@@ -1298,8 +1302,12 @@ def validate_kana(text: str) -> bool:
 
     @app.get("/setting", response_class=Response, tags=["設定"])
     def setting_get(request: Request) -> Response:
+        """
+        設定ページを返します。
+        """
         settings = setting_loader.load_setting_file()
 
+        brand_name = engine_manifest_data.brand_name
         cors_policy_mode = settings.cors_policy_mode
         allow_origin = settings.allow_origin
 
@@ -1310,6 +1318,7 @@ def setting_get(request: Request) -> Response:
             "ui.html",
             {
                 "request": request,
+                "brand_name": brand_name,
                 "cors_policy_mode": cors_policy_mode,
                 "allow_origin": allow_origin,
             },
@@ -1322,10 +1331,12 @@ def setting_get(request: Request) -> Response:
         dependencies=[Depends(check_disabled_mutable_api)],
     )
     def setting_post(
-        request: Request,
-        cors_policy_mode: str | None = Form(None),  # noqa: B008
-        allow_origin: str | None = Form(None),  # noqa: B008
+        cors_policy_mode: CorsPolicyMode = Form(),  # noqa
+        allow_origin: str | None = Form(default=None),  # noqa
     ) -> Response:
+        """
+        設定を更新します。
+        """
         settings = Setting(
             cors_policy_mode=cors_policy_mode,
             allow_origin=allow_origin,
@@ -1334,20 +1345,7 @@ def setting_post(
         # 更新した設定へ上書き
         setting_loader.dump_setting_file(settings)
 
-        message = "設定を保存しました。"
-
-        if allow_origin is None:
-            allow_origin = ""
-
-        return setting_ui_template.TemplateResponse(
-            "ui.html",
-            {
-                "request": request,
-                "cors_policy_mode": cors_policy_mode,
-                "allow_origin": allow_origin,
-                "message": message,
-            },
-        )
+        return Response(status_code=204)
 
     # BaseLibraryInfo/VvlibManifestモデルはAPIとして表には出ないが、エディタ側で利用したいので、手動で追加する
     # ref: https://fastapi.tiangolo.com/advanced/extending-openapi/#modify-the-openapi-schema
diff --git a/ui_template/ui.html b/ui_template/ui.html
index 3a156f3f3..b65526e4c 100644
--- a/ui_template/ui.html
+++ b/ui_template/ui.html
@@ -1,271 +1,368 @@
-<!doctype html>
+<!DOCTYPE html>
+
+<!-- 
+  VOICEVOXエンジンの設定ページです。
+  VueとBootstrapを使っています。
+  ライブラリを読み込んだあと、Vueコンポーネントの初期化が完了してからUIを表示します。
+-->
+
 <html lang="ja">
-    <head>
-        <meta charset="utf-8" />
-        <title>VOICEVOX Engine 設定</title>
-        <link
-            rel="shortcut icon"
-            href="https://voicevox.hiroshiba.jp/favicon-32x32.png"
-        />
+  <head>
+    <meta charset="utf-8" />
+    <title>VOICEVOX Engine 設定</title>
+    <link
+      rel="shortcut icon"
+      href="https://voicevox.hiroshiba.jp/favicon-32x32.png"
+    />
+
+    <style>
+      .before-init-fadein {
+        animation: fadein 0.5s;
+      }
+
+      /* 指定時間の最後に現れるフェードイン */
+      @keyframes fadein {
+        0% {
+          opacity: 0;
+        }
+        95% {
+          opacity: 0;
+        }
+        100% {
+          opacity: 1;
+        }
+      }
+    </style>
+  </head>
+
+  <body>
+    <!-- Vueの準備が完了した後にdisplay: noneにする -->
+    <div id="before-init" style="display: block" class="before-init-fadein">
+      <p>読み込み中です。表示には数秒かかることがあります。</p>
+    </div>
 
-        <link
-            href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css"
-            rel="stylesheet"
-            integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC"
-            crossorigin="anonymous"
+    <!-- Vueの準備が完了した後にdisplay: blockにする -->
+    <div id="app" class="container p-3" style="display: none">
+      <h1 class="mb-3">{{brandName}} エンジン 設定</h1>
+
+      <div class="alert alert-warning" role="alert">
+        変更を反映するにはエンジンの再起動が必要です。
+      </div>
+
+      <div class="mb-3">
+        <label class="form-label">CORS Policy Mode</label>
+        <select
+          class="form-select"
+          aria-label="corsPolicyMode"
+          v-model="corsPolicyMode"
+        >
+          <option value="localapps">localapps</option>
+          <option value="all">all</option>
+        </select>
+        <div class="form-text">
+          <p class="mb-1">
+            localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。
+          </p>
+          <p class="mb-1">
+            その他のオリジンはAllow Originオプションで追加できます。
+          </p>
+          <p>allはすべてを許可します。危険性を理解した上でご利用ください。</p>
+        </div>
+      </div>
+
+      <div class="mb-3">
+        <label class="form-label">Allow Origin</label>
+        <input
+          class="form-control"
+          type="text"
+          v-model.trim.lazy="allowOrigin"
         />
-        <script
-            src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
-            integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
-            crossorigin="anonymous"
-        ></script>
-    </head>
-
-    <body>
-        <div class="container p-3">
-            <form method="post" enctype="multipart/form-data">
-                <div class="alert alert-warning" role="alert">
-                    設定の変更の更新にはエンジンの再起動が必要です。
-                </div>
-
-                <div class="mb-3">
-                    <label class="form-label">CORS Policy Mode</label>
-                    <select
-                        class="form-select"
-                        aria-label="cors_policy_mode"
-                        name="cors_policy_mode"
-                    >
-                        <option selected value="{{ cors_policy_mode }}">
-                            現在値: {{ cors_policy_mode }}
-                        </option>
-                        <option value="localapps">localapps</option>
-                        <option value="all">all</option>
-                    </select>
-                    <div class="form-text">
-                        <p class="mb-1">
-                            allまたはlocalappsを指定。allはすべてを許可します。
-                        </p>
-                        <p class="mb-1">
-                            localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。
-                        </p>
-                        <p>
-                            その他のオリジンはallow_originオプションで追加できます。デフォルトはlocalapps。
-                        </p>
-                    </div>
-                </div>
-
-                <div class="mb-3">
-                    <label class="form-label">Allow Origin</label>
-                    <input
-                        class="form-control"
-                        type="text"
-                        name="allow_origin"
-                        value="{{ allow_origin }}"
-                    />
-                    <div class="form-text">
-                        許可するオリジンを指定します。複数指定する場合は、直後にスペースで区切って追加できます。
-                    </div>
-                </div>
-
-                <button
-                    type="button"
-                    class="btn btn-primary mb-3"
-                    data-bs-toggle="modal"
-                    data-bs-target="#submitModal"
-                >
-                    保存
-                </button>
-
-                <hr />
-
-                <div id="mb-3">
-                    <label class="form-label"
-                        >ユーザー辞書のエクスポート&インポート</label
-                    >
-                    <div class="form-text">辞書のエクスポートをします。</div>
-                    <a
-                        download="VOICEVOXユーザー辞書.json"
-                        class="btn btn-primary mb-3"
-                        href="/user_dict"
-                        onclick="showToastWithMessage('辞書をエクスポートしました。');"
-                        target="_blank"
-                        rel="noopener noreferrer"
-                    >
-                        エクスポート
-                    </a>
-                    <div class="form-text">辞書のインポートをします。</div>
-                    <input
-                        class="m-3 form-control"
-                        type="file"
-                        name="user_dictionary_file"
-                        accept="application/json"
-                        id="userDictFile"
-                    />
-                    <input
-                        class="ms-3 form-check-input"
-                        type="checkbox"
-                        name="allow_override"
-                        value="true"
-                        id="allowOverride"
-                    />
-                    <label class="mb-3 form-check-label" for="allowOverride"
-                        >辞書の上書きを許可する。</label
-                    >
-                </div>
-
-                <!-- TODO: 辞書パスが未入力の場合Disableにする -->
-                <button
-                    type="button"
-                    class="btn btn-primary"
-                    data-bs-toggle="modal"
-                    data-bs-target="#dictSubmitModal"
-                >
-                    インポート
-                </button>
-
-                <div
-                    class="modal fade"
-                    id="submitModal"
-                    tabindex="-1"
-                    aria-labelledby="submitModalLabel"
-                    aria-hidden="true"
-                >
-                    <div class="modal-dialog">
-                        <div class="modal-content">
-                            <div class="modal-header">
-                                <h5 class="modal-title" id="submitModalLabel">
-                                    設定の保存
-                                </h5>
-                                <button
-                                    type="button"
-                                    class="btn-close"
-                                    data-bs-dismiss="modal"
-                                    aria-label="Close"
-                                ></button>
-                            </div>
-                            <div class="modal-body">
-                                設定を保存します。よろしいですか？
-                            </div>
-                            <div class="modal-footer">
-                                <button
-                                    type="button"
-                                    class="btn btn-secondary"
-                                    data-bs-dismiss="modal"
-                                >
-                                    キャンセル
-                                </button>
-                                <button type="submit" class="btn btn-primary">
-                                    保存
-                                </button>
-                            </div>
-                        </div>
-                    </div>
-                </div>
-
-                <div
-                    class="modal fade"
-                    id="dictSubmitModal"
-                    tabindex="-1"
-                    aria-labelledby="dictSubmitModalLabel"
-                    aria-hidden="true"
-                >
-                    <div class="modal-dialog">
-                        <div class="modal-content">
-                            <div class="modal-header">
-                                <h5
-                                    class="modal-title"
-                                    id="dictSubmitModalLabel"
-                                >
-                                    ユーザー辞書のインポート
-                                </h5>
-                                <button
-                                    type="button"
-                                    class="btn-close"
-                                    data-bs-dismiss="modal"
-                                    aria-label="Close"
-                                ></button>
-                            </div>
-                            <div class="modal-body">
-                                ユーザー辞書をインポートします。よろしいですか？
-                            </div>
-                            <div class="modal-footer">
-                                <button
-                                    type="button"
-                                    class="btn btn-secondary"
-                                    data-bs-dismiss="modal"
-                                >
-                                    キャンセル
-                                </button>
-                                <button
-                                    type="button"
-                                    onclick="importUserDict()"
-                                    class="btn btn-primary"
-                                    data-bs-dismiss="modal"
-                                >
-                                    インポート
-                                </button>
-                            </div>
-                        </div>
-                    </div>
-                </div>
-            </form>
+        <div class="form-text">
+          許可するオリジンを指定します。スペースで区切ることで複数指定できます。
+        </div>
+      </div>
+
+      <div class="mb-3">
+        <label class="form-label">ユーザー辞書のインポート</label>
+        <div class="col-12">
+          <button
+            type="button"
+            class="btn btn-primary"
+            data-bs-toggle="modal"
+            data-bs-target="#importUserDictModal"
+          >
+            インポート
+          </button>
         </div>
+      </div>
 
-        <div class="position-fixed bottom-0 end-0 p-3" style="z-index: 5">
-            <div
-                class="toast align-items-center hide text-white bg-success"
-                role="alert"
-                aria-live="assertive"
-                aria-atomic="true"
-                id="toast"
-            >
-                <div class="d-flex">
-                    <div class="toast-body"></div>
-                </div>
+      <div class="mb-3">
+        <label class="form-label">ユーザー辞書のエクスポート</label>
+        <div class="col-12">
+          <a
+            download="VOICEVOXユーザー辞書.json"
+            class="btn btn-primary mb-3"
+            href="/user_dict"
+            @click="showToastWithMessage('辞書をエクスポートしました。');"
+            target="_blank"
+            rel="noopener noreferrer"
+          >
+            エクスポート
+          </a>
+        </div>
+      </div>
+
+      <!-- ユーザー辞書インポート用モーダル -->
+      <div
+        class="modal fade"
+        id="importUserDictModal"
+        tabindex="-1"
+        aria-labelledby="importUserDictModalLabel"
+        aria-hidden="true"
+      >
+        <div class="modal-dialog">
+          <div class="modal-content">
+            <div class="modal-header">
+              <h5 class="modal-title" id="importUserDictModalLabel">
+                ユーザー辞書のインポート
+              </h5>
+              <button
+                type="button"
+                class="btn-close"
+                data-bs-dismiss="modal"
+                aria-label="Close"
+              ></button>
+            </div>
+            <div class="modal-body">
+              <input
+                class="form-control"
+                type="file"
+                accept="application/json"
+                @change="(e) => { userDictFileForImport = e.target.files[0]; }"
+              />
             </div>
+            <div class="modal-footer">
+              <button
+                type="button"
+                class="btn btn-secondary"
+                data-bs-dismiss="modal"
+              >
+                キャンセル
+              </button>
+              <button
+                type="button"
+                @click="importUserDict"
+                class="btn btn-primary"
+                data-bs-dismiss="modal"
+                :disabled="userDictFileForImport == undefined"
+              >
+                インポート
+              </button>
+            </div>
+          </div>
         </div>
-        <script>
-            const reader = new FileReader();
+      </div>
 
-            const allowOverrideElement =
-                document.getElementById("allowOverride");
+      <!-- トースト -->
+      <div class="position-fixed bottom-0 end-0 p-3" style="z-index: 5">
+        <div
+          class="toast align-items-center autohide text-white bg-success"
+          role="alert"
+          aria-live="assertive"
+          aria-atomic="true"
+          ref="toastElem"
+        >
+          <div class="d-flex">
+            <div class="toast-body">{{toastMessage}}</div>
+          </div>
+        </div>
+      </div>
+    </div>
 
-            const toastElement = document.getElementById("toast");
-            const toast = new bootstrap.Toast(toastElement);
-            const toastBody = toastElement.getElementsByClassName("toast-body");
+    <script>
+      // Vueの初期化
+      function initVue() {
+        const { createApp, ref, watch, onMounted } = Vue;
+        createApp({
+          setup() {
+            // 設定値周り
+            const corsPolicyMode = ref(
+              "<JINJA_PRE>cors_policy_mode<JINJA_POST>"
+            );
+            const allowOrigin = ref("<JINJA_PRE>allow_origin<JINJA_POST>");
 
-            const showToastWithMessage = (message) => {
-                toast.show();
-                toastBody[0].innerHTML = message;
-            };
+            // 設定が変更されたら自動保存
+            watch([corsPolicyMode, allowOrigin], () => {
+              const formData = new FormData();
+              formData.append("cors_policy_mode", corsPolicyMode.value);
+              formData.append("allow_origin", allowOrigin.value);
 
-            // 読み込み時にメッセージがあれば表示する
-            var msg = "{{message}}";
-            if (msg) {
-                showToastWithMessage(msg);
-            }
+              fetch("/setting", {
+                method: "POST",
+                mode: "same-origin",
+                body: formData,
+              }).then((res) => {
+                if (res.ok) {
+                  showToastWithMessage("設定を保存しました。");
+                } else {
+                  showToastWithMessage("設定の保存に失敗しました。");
+                }
+              });
+            });
 
-            reader.addEventListener("load", async () => {
-                const params = {
-                    override: allowOverrideElement.checked ? true : false,
-                };
-                const query_params = new URLSearchParams(params);
-
-                await fetch(`/import_user_dict?${query_params}`, {
-                    method: "POST",
-                    mode: "same-origin",
-                    headers: { "Content-Type": "application/json" },
-                    body: reader.result,
+            // ユーザー辞書周り
+            const userDictFileForImport = ref();
+
+            const importUserDict = () => {
+              if (userDictFileForImport.value == undefined) {
+                throw new Error("userDictFileForImportが見つかりません。");
+              }
+
+              const reader = new FileReader();
+              reader.addEventListener("load", async () => {
+                const params = new URLSearchParams({
+                  override: true, // 重複するエントリを上書きする
+                });
+                await fetch(`/import_user_dict?${params}`, {
+                  method: "POST",
+                  mode: "same-origin",
+                  headers: { "Content-Type": "application/json" },
+                  body: reader.result,
                 });
 
                 showToastWithMessage("辞書をインポートしました。");
+              });
+
+              reader.readAsText(userDictFileForImport.value);
+            };
+
+            // トースト
+            const toastElem = ref(undefined);
+            const bootstrapToast = ref(undefined);
+            const toastMessage = ref("");
+            onMounted(() => {
+              if (toastElem.value == undefined) {
+                throw new Error("toastElemが見つかりません。");
+              }
+              bootstrapToast.value = new bootstrap.Toast(toastElem.value);
             });
+            const showToastWithMessage = (message) => {
+              console.log(`showToastWithMessage: ${message}`);
+              bootstrapToast.value.show();
+              toastMessage.value = message;
+            };
 
-            const importUserDict = () => {
-                const userDictFile =
-                    document.getElementById("userDictFile").files[0];
-                reader.readAsText(userDictFile);
+            // 表示用の情報
+            const brandName = ref("<JINJA_PRE>brand_name<JINJA_POST>");
+
+            // Vueの準備が完了したら表示・非表示を切り替える
+            onMounted(() => {
+              document.getElementById("before-init").style.display = "none";
+              document.getElementById("app").style.display = "block";
+            });
+
+            return {
+              corsPolicyMode,
+              allowOrigin,
+              userDictFileForImport,
+              importUserDict,
+              toastElem,
+              toastMessage,
+              showToastWithMessage,
+              brandName,
             };
-        </script>
-    </body>
+          },
+        }).mount("#app");
+      }
+
+      /**
+       * CDNからscriptやCSSを読み込む。
+       * CDNが使えないときのために複数の候補を試す。
+       */
+      const loadCDN = async (scriptOrCss, candidateUrlList, integrity) => {
+        if (scriptOrCss !== "script" && scriptOrCss !== "css") {
+          throw new Error("scriptOrCssはscriptかcssを指定してください。");
+        }
+
+        let current = 0;
+        await new Promise((resolve, reject) => {
+          const loadNext = async () => {
+            if (current >= candidateUrlList.length) {
+              reject(new Error("全てのCDNで読み込みに失敗しました。"));
+              return;
+            }
+
+            let elem;
+            if (scriptOrCss === "script") {
+              elem = document.createElement("script");
+              elem.src = candidateUrlList[current];
+            } else {
+              elem = document.createElement("link");
+              elem.href = candidateUrlList[current];
+              elem.rel = "stylesheet";
+            }
+            elem.integrity = integrity;
+            elem.crossOrigin = "anonymous";
+            elem.onload = resolve;
+            elem.onerror = () => {
+              console.warn(
+                `CDNの読み込みに失敗しました。 ${candidateUrlList[current]}`
+              );
+              document.head.removeChild(elem);
+              current++;
+              loadNext();
+            };
+            document.head.appendChild(elem);
+          };
+          loadNext();
+        });
+      };
+
+      // 初期化用の関数
+      const init = async () => {
+        // ライブラリ読み込み用のPromiseリスト
+        const libraryLoadingPromises = [];
+
+        // Bootstrapを読み込む
+        const bootstrapCssPromise = loadCDN(
+          "css",
+          [
+            "https://unpkg.com/bootstrap@5.0.2/dist/css/bootstrap.min.css",
+            "https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css",
+            "https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.0.2/css/bootstrap.min.css",
+          ],
+          "sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC"
+        );
+        libraryLoadingPromises.push(bootstrapCssPromise);
+
+        const bootstrapScriptPromise = loadCDN(
+          "script",
+          [
+            "https://unpkg.com/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js",
+            "https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js",
+            "https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.0.2/js/bootstrap.bundle.min.js",
+          ],
+          "sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
+        );
+        libraryLoadingPromises.push(bootstrapScriptPromise);
+
+        // Vueを読み込む
+        const vuePromise = loadCDN(
+          "script",
+          [
+            "https://unpkg.com/vue@3.3.10/dist/vue.global.js",
+            "https://cdn.jsdelivr.net/npm/vue@3.3.10/dist/vue.global.js",
+            "https://cdnjs.cloudflare.com/ajax/libs/vue/3.3.10/vue.global.js",
+          ],
+          "sha384-ttfhgYK68lNlS8ak6Z//mvUbpRbRCh43MYGuqEtK8mj/yzlKqY8GA8o3BPMi23cE"
+        );
+        libraryLoadingPromises.push(vuePromise);
+
+        // ライブラリの読み込みが完了したらVueを初期化
+        await Promise.all(libraryLoadingPromises);
+        initVue();
+      };
+      init();
+    </script>
+  </body>
 </html>

From 4513a79942db4c6584211a04ba31586caa7adb6d Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 9 Jan 2024 02:06:27 +0900
Subject: [PATCH 125/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`tts=5Fpipeline`?=
 =?UTF-8?q?=20=E3=83=86=E3=82=B9=E3=83=88=E3=83=87=E3=82=A3=E3=83=AC?=
 =?UTF-8?q?=E3=82=AF=E3=83=88=E3=83=AA=20(#986)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `tts_pipeline` テストディレクトリ
---
 test/tts_pipeline/__init__.py                                     | 0
 .../test_tts_engine/test_mocked_update_length_output.json         | 0
 test/{ => tts_pipeline}/test_acoustic_feature_extractor.py        | 0
 test/{ => tts_pipeline}/test_kana_converter.py                    | 0
 test/{ => tts_pipeline}/test_mora_list.py                         | 0
 test/{ => tts_pipeline}/test_mora_to_text.py                      | 0
 test/{ => tts_pipeline}/test_text_analyzer.py                     | 0
 test/{ => tts_pipeline}/test_tts_engine.py                        | 0
 test/{ => tts_pipeline}/test_tts_engine_base.py                   | 0
 9 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 test/tts_pipeline/__init__.py
 rename test/{ => tts_pipeline}/__snapshots__/test_tts_engine/test_mocked_update_length_output.json (100%)
 rename test/{ => tts_pipeline}/test_acoustic_feature_extractor.py (100%)
 rename test/{ => tts_pipeline}/test_kana_converter.py (100%)
 rename test/{ => tts_pipeline}/test_mora_list.py (100%)
 rename test/{ => tts_pipeline}/test_mora_to_text.py (100%)
 rename test/{ => tts_pipeline}/test_text_analyzer.py (100%)
 rename test/{ => tts_pipeline}/test_tts_engine.py (100%)
 rename test/{ => tts_pipeline}/test_tts_engine_base.py (100%)

diff --git a/test/tts_pipeline/__init__.py b/test/tts_pipeline/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/__snapshots__/test_tts_engine/test_mocked_update_length_output.json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_length_output.json
similarity index 100%
rename from test/__snapshots__/test_tts_engine/test_mocked_update_length_output.json
rename to test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_length_output.json
diff --git a/test/test_acoustic_feature_extractor.py b/test/tts_pipeline/test_acoustic_feature_extractor.py
similarity index 100%
rename from test/test_acoustic_feature_extractor.py
rename to test/tts_pipeline/test_acoustic_feature_extractor.py
diff --git a/test/test_kana_converter.py b/test/tts_pipeline/test_kana_converter.py
similarity index 100%
rename from test/test_kana_converter.py
rename to test/tts_pipeline/test_kana_converter.py
diff --git a/test/test_mora_list.py b/test/tts_pipeline/test_mora_list.py
similarity index 100%
rename from test/test_mora_list.py
rename to test/tts_pipeline/test_mora_list.py
diff --git a/test/test_mora_to_text.py b/test/tts_pipeline/test_mora_to_text.py
similarity index 100%
rename from test/test_mora_to_text.py
rename to test/tts_pipeline/test_mora_to_text.py
diff --git a/test/test_text_analyzer.py b/test/tts_pipeline/test_text_analyzer.py
similarity index 100%
rename from test/test_text_analyzer.py
rename to test/tts_pipeline/test_text_analyzer.py
diff --git a/test/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
similarity index 100%
rename from test/test_tts_engine.py
rename to test/tts_pipeline/test_tts_engine.py
diff --git a/test/test_tts_engine_base.py b/test/tts_pipeline/test_tts_engine_base.py
similarity index 100%
rename from test/test_tts_engine_base.py
rename to test/tts_pipeline/test_tts_engine_base.py

From d8393c9ed7e54f8634583b555a205912c2435e34 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 9 Jan 2024 07:52:26 +0900
Subject: [PATCH 126/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20`pre=5Fprocess()`?=
 =?UTF-8?q?=20(#976)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove: `pre_process()`
---
 test/tts_pipeline/test_tts_engine.py       | 37 ----------------------
 voicevox_engine/tts_pipeline/tts_engine.py | 17 ++--------
 2 files changed, 3 insertions(+), 51 deletions(-)

diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index 5dc7e7275..f4136b3c6 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -23,7 +23,6 @@
     apply_speed_scale,
     apply_volume_scale,
     count_frame_per_unit,
-    pre_process,
     query_to_decoder_feature,
     raw_wave_to_output_wave,
     split_mora,
@@ -530,42 +529,6 @@ def test_to_flatten_moras(self):
             + true_accent_phrases_hello_hiho[1].moras,
         )
 
-    def test_pre_process(self):
-        flatten_moras, phoneme_data_list = pre_process(_gen_hello_hiho_accent_phrases())
-
-        mora_index = 0
-        phoneme_index = 1
-
-        self.assertTrue(is_same_phoneme(phoneme_data_list[0], Phoneme("pau")))
-        for accent_phrase in _gen_hello_hiho_accent_phrases():
-            moras = accent_phrase.moras
-            for mora in moras:
-                self.assertEqual(flatten_moras[mora_index], mora)
-                mora_index += 1
-                if mora.consonant is not None:
-                    self.assertTrue(
-                        is_same_phoneme(
-                            phoneme_data_list[phoneme_index], Phoneme(mora.consonant)
-                        )
-                    )
-                    phoneme_index += 1
-                self.assertTrue(
-                    is_same_phoneme(
-                        phoneme_data_list[phoneme_index], Phoneme(mora.vowel)
-                    )
-                )
-                phoneme_index += 1
-            if accent_phrase.pause_mora:
-                self.assertEqual(flatten_moras[mora_index], accent_phrase.pause_mora)
-                mora_index += 1
-                self.assertTrue(
-                    is_same_phoneme(phoneme_data_list[phoneme_index], Phoneme("pau"))
-                )
-                phoneme_index += 1
-        self.assertTrue(
-            is_same_phoneme(phoneme_data_list[phoneme_index], Phoneme("pau"))
-        )
-
     def test_update_length(self):
         # Inputs
         hello_hiho = _gen_hello_hiho_accent_phrases()
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 69692ec40..03589fcc4 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -69,19 +69,6 @@ def split_mora(phonemes: list[Phoneme]) -> tuple[list[Phoneme | None], list[Phon
     return consonants, vowels
 
 
-def pre_process(
-    accent_phrases: list[AccentPhrase],
-) -> tuple[list[Mora], list[Phoneme]]:
-    """アクセント句系列から（前後の無音含まない）モーラ系列と（前後の無音含む）音素系列を抽出する"""
-    flatten_moras = to_flatten_moras(accent_phrases)
-    phonemes = to_flatten_phonemes(flatten_moras)
-
-    # 前後無音の追加
-    phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]
-
-    return flatten_moras, phonemes
-
-
 def generate_silence_mora(length: float) -> Mora:
     """無音モーラの生成"""
     return Mora(text="　", vowel="sil", vowel_length=length, pitch=0.0)
@@ -360,7 +347,9 @@ def _create_one_hot(
         end_accent_phrase_list = np.array(end_accent_phrase_list, dtype=np.int64)
 
         # アクセント句系列から（前後の無音含まない）モーラ系列と（前後の無音含む）音素系列を抽出する
-        moras, phonemes = pre_process(accent_phrases)
+        moras = to_flatten_moras(accent_phrases)
+        phonemes = to_flatten_phonemes(moras)
+        phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]
 
         # 前後無音付加済みの音素系列から子音ID系列・母音ID系列を抽出する
         consonants, vowels = split_mora(phonemes)

From eac877922bdcae9f453856747bc35ae87ae85814 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 9 Jan 2024 08:05:54 +0900
Subject: [PATCH 127/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`=5Fcreate=5Fone?=
 =?UTF-8?q?=5Fhot()`=20(#990)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `update_pitch` キャスト簡略化

* refactor: `_create_one_hot()`

* refactor: util 移設

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 voicevox_engine/tts_pipeline/tts_engine.py | 45 ++++++----------------
 1 file changed, 11 insertions(+), 34 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 03589fcc4..d2b9adc91 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -69,6 +69,17 @@ def split_mora(phonemes: list[Phoneme]) -> tuple[list[Phoneme | None], list[Phon
     return consonants, vowels
 
 
+def _create_one_hot(accent_phrase: AccentPhrase, index: int) -> NDArray[np.int64]:
+    """
+    アクセント句から指定インデックスのみが 1 の配列 (onehot) を生成する。
+    長さ `len(moras)` な配列の指定インデックスを 1 とし、pause_mora を含む場合は末尾に 0 が付加される。
+    """
+    onehot = np.zeros(len(accent_phrase.moras))
+    onehot[index] = 1
+    onehot = np.append(onehot, [0] if accent_phrase.pause_mora else [])
+    return onehot.astype(np.int64)
+
+
 def generate_silence_mora(length: float) -> Mora:
     """無音モーラの生成"""
     return Mora(text="　", vowel="sil", vowel_length=length, pitch=0.0)
@@ -280,34 +291,6 @@ def update_pitch(
         if len(accent_phrases) == 0:
             return []
 
-        # accent
-        def _create_one_hot(
-            accent_phrase: AccentPhrase, position: int
-        ) -> NDArray[np.floating]:
-            """
-            単位行列(np.eye)を応用し、accent_phrase内でone hotな配列(リスト)を作る
-            例えば、accent_phraseのmorasの長さが12、positionが1なら
-            [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-            morasの長さが同じく12、positionが-1なら
-            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
-            のような配列を生成する
-            accent_phraseがpause_moraを含む場合はさらに後ろに0が足される
-            Parameters
-            ----------
-            accent_phrase : AccentPhrase
-                アクセント句モデル
-            position : int
-                one hotにするindex
-            Returns
-            -------
-            one_hot : NDArray[np.floating]
-                one hotな配列(リスト)
-            """
-            return np.r_[
-                np.eye(len(accent_phrase.moras))[position],
-                (0 if accent_phrase.pause_mora is not None else []),
-            ]
-
         # アクセントの開始/終了位置リストを作る
         start_accent_list = np.concatenate(
             [
@@ -340,12 +323,6 @@ def _create_one_hot(
         start_accent_phrase_list = np.r_[0, start_accent_phrase_list, 0]
         end_accent_phrase_list = np.r_[0, end_accent_phrase_list, 0]
 
-        # キャスト
-        start_accent_list = np.array(start_accent_list, dtype=np.int64)
-        end_accent_list = np.array(end_accent_list, dtype=np.int64)
-        start_accent_phrase_list = np.array(start_accent_phrase_list, dtype=np.int64)
-        end_accent_phrase_list = np.array(end_accent_phrase_list, dtype=np.int64)
-
         # アクセント句系列から（前後の無音含まない）モーラ系列と（前後の無音含む）音素系列を抽出する
         moras = to_flatten_moras(accent_phrases)
         phonemes = to_flatten_phonemes(moras)

From 39468d41bedae147b5946363fd3d85ce1a19b6e0 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 9 Jan 2024 08:09:37 +0900
Subject: [PATCH 128/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20TTS=20=E9=96=A2?=
 =?UTF-8?q?=E9=80=A3=20`=5F=5Finit=5F=5F.py`=20(#987)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* remove: __init__.py `make_tts_engines_from_cores`

* remove: part of __init__.py `TTSEngine`

* remove: __init__.py `load_runtime_lib`

* remove: __init__.py `CoreWrapper`

* remove: `tts_pipeline.__init__`
---
 run.py                                    | 5 ++++-
 test/e2e/conftest.py                      | 2 +-
 test/tts_pipeline/test_tts_engine.py      | 2 +-
 test/tts_pipeline/test_tts_engine_base.py | 6 +++---
 voicevox_engine/cancellable_engine.py     | 2 +-
 voicevox_engine/dev/tts_engine/mock.py    | 3 +--
 voicevox_engine/morphing.py               | 2 +-
 voicevox_engine/tts_pipeline/__init__.py  | 9 ---------
 8 files changed, 12 insertions(+), 19 deletions(-)
 delete mode 100644 voicevox_engine/tts_pipeline/__init__.py

diff --git a/run.py b/run.py
index 97123ad60..7041028cb 100644
--- a/run.py
+++ b/run.py
@@ -70,8 +70,11 @@
     Setting,
     SettingLoader,
 )
-from voicevox_engine.tts_pipeline import TTSEngine, make_tts_engines_from_cores
 from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
+from voicevox_engine.tts_pipeline.tts_engine import (
+    TTSEngine,
+    make_tts_engines_from_cores,
+)
 from voicevox_engine.user_dict import (
     apply_word,
     delete_word,
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 50d593f8c..4fbb450b4 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -7,7 +7,7 @@
 from voicevox_engine.core_initializer import initialize_cores
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import SettingLoader
-from voicevox_engine.tts_pipeline import make_tts_engines_from_cores
+from voicevox_engine.tts_pipeline.tts_engine import make_tts_engines_from_cores
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
 
diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index f4136b3c6..fed0809c1 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -11,10 +11,10 @@
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.tts_pipeline import TTSEngine
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
 from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases
 from voicevox_engine.tts_pipeline.tts_engine import (
+    TTSEngine,
     apply_intonation_scale,
     apply_output_sampling_rate,
     apply_output_stereo,
diff --git a/test/tts_pipeline/test_tts_engine_base.py b/test/tts_pipeline/test_tts_engine_base.py
index a81e5f301..07f81b416 100644
--- a/test/tts_pipeline/test_tts_engine_base.py
+++ b/test/tts_pipeline/test_tts_engine_base.py
@@ -3,9 +3,9 @@
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.model import AccentPhrase, Mora
-from voicevox_engine.tts_pipeline import TTSEngine
-from voicevox_engine.tts_pipeline.tts_engine import (
-    apply_interrogative_upspeak,  # FIXME: この関数を使うテストをTTSEngine用のテストに移動する
+from voicevox_engine.tts_pipeline.tts_engine import (  # FIXME: この関数を使うテストをTTSEngine用のテストに移動する
+    TTSEngine,
+    apply_interrogative_upspeak,
 )
 
 
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index 63fb7110e..f3f8200ef 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -19,7 +19,7 @@
 from .core_initializer import initialize_cores
 from .metas.Metas import StyleId
 from .model import AudioQuery
-from .tts_pipeline import make_tts_engines_from_cores
+from .tts_pipeline.tts_engine import make_tts_engines_from_cores
 from .utility import get_latest_core_version
 
 
diff --git a/voicevox_engine/dev/tts_engine/mock.py b/voicevox_engine/dev/tts_engine/mock.py
index e85c44aa5..9373cb0ce 100644
--- a/voicevox_engine/dev/tts_engine/mock.py
+++ b/voicevox_engine/dev/tts_engine/mock.py
@@ -9,8 +9,7 @@
 
 from ...metas.Metas import StyleId
 from ...model import AudioQuery
-from ...tts_pipeline import TTSEngine
-from ...tts_pipeline.tts_engine import to_flatten_moras
+from ...tts_pipeline.tts_engine import TTSEngine, to_flatten_moras
 from ..core.mock import MockCoreWrapper
 
 
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index e5d7cbd7e..92b54c2d1 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -16,7 +16,7 @@
 )
 from .metas.MetasStore import construct_lookup
 from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
-from .tts_pipeline import TTSEngine
+from .tts_pipeline.tts_engine import TTSEngine
 
 
 @dataclass(frozen=True)
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
deleted file mode 100644
index d6e066469..000000000
--- a/voicevox_engine/tts_pipeline/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from ..core_wrapper import CoreWrapper, load_runtime_lib
-from .tts_engine import TTSEngine, make_tts_engines_from_cores
-
-__all__ = [
-    "CoreWrapper",
-    "load_runtime_lib",
-    "make_tts_engines_from_cores",
-    "TTSEngine",
-]

From 5bd6a96d578f387c381556ffab7271b08b510068 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 9 Jan 2024 08:45:44 +0900
Subject: [PATCH 129/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20old=20test=20?=
 =?UTF-8?q?=E3=81=AE=E7=A7=BB=E6=A4=8D=20(#994)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: old test の移植

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 test/tts_pipeline/test_tts_engine.py      | 308 +++++++++++++++++++++
 test/tts_pipeline/test_tts_engine_base.py | 316 ----------------------
 2 files changed, 308 insertions(+), 316 deletions(-)
 delete mode 100644 test/tts_pipeline/test_tts_engine_base.py

diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index fed0809c1..62a34ee6d 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -15,6 +15,7 @@
 from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
+    apply_interrogative_upspeak,
     apply_intonation_scale,
     apply_output_sampling_rate,
     apply_output_stereo,
@@ -675,3 +676,310 @@ def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> No
     result = tts_engine.update_length(hello_hiho, StyleId(1))
     # Tests
     assert snapshot_json == json.loads(json.dumps(result, default=pydantic_encoder))
+
+
+def koreha_arimasuka_base_expected():
+    return [
+        AccentPhrase(
+            moras=[
+                Mora(
+                    text="コ",
+                    consonant="k",
+                    consonant_length=2.44,
+                    vowel="o",
+                    vowel_length=2.88,
+                    pitch=4.38,
+                ),
+                Mora(
+                    text="レ",
+                    consonant="r",
+                    consonant_length=3.06,
+                    vowel="e",
+                    vowel_length=1.88,
+                    pitch=4.0,
+                ),
+                Mora(
+                    text="ワ",
+                    consonant="w",
+                    consonant_length=3.62,
+                    vowel="a",
+                    vowel_length=1.44,
+                    pitch=4.19,
+                ),
+            ],
+            accent=3,
+            pause_mora=None,
+            is_interrogative=False,
+        ),
+        AccentPhrase(
+            moras=[
+                Mora(
+                    text="ア",
+                    consonant=None,
+                    consonant_length=None,
+                    vowel="a",
+                    vowel_length=1.44,
+                    pitch=1.44,
+                ),
+                Mora(
+                    text="リ",
+                    consonant="r",
+                    consonant_length=3.06,
+                    vowel="i",
+                    vowel_length=2.31,
+                    pitch=4.44,
+                ),
+                Mora(
+                    text="マ",
+                    consonant="m",
+                    consonant_length=2.62,
+                    vowel="a",
+                    vowel_length=1.44,
+                    pitch=3.12,
+                ),
+                Mora(
+                    text="ス",
+                    consonant="s",
+                    consonant_length=3.19,
+                    vowel="U",
+                    vowel_length=1.38,
+                    pitch=0.0,
+                ),
+                Mora(
+                    text="カ",
+                    consonant="k",
+                    consonant_length=2.44,
+                    vowel="a",
+                    vowel_length=1.44,
+                    pitch=2.94,
+                ),
+            ],
+            accent=3,
+            pause_mora=None,
+            is_interrogative=False,
+        ),
+    ]
+
+
+class TestTTSEngineBase(TestCase):
+    def setUp(self):
+        super().setUp()
+        self.tts_engine = TTSEngine(core=MockCoreWrapper())
+
+    def create_synthesis_test_base(
+        self,
+        text: str,
+        expected: list[AccentPhrase],
+        enable_interrogative_upspeak: bool,
+    ) -> None:
+        """音声合成時に疑問文モーラ処理を行っているかどうかを検証
+        (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
+        """
+        inputs = self.tts_engine.create_accent_phrases(text, StyleId(1))
+        outputs = apply_interrogative_upspeak(inputs, enable_interrogative_upspeak)
+        self.assertEqual(expected, outputs, f"case(text:{text})")
+
+    def test_create_accent_phrases(self):
+        """accent_phrasesの作成時では疑問文モーラ処理を行わない
+        (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
+        """
+        text = "これはありますか？"
+        expected = koreha_arimasuka_base_expected()
+        expected[-1].is_interrogative = True
+        actual = self.tts_engine.create_accent_phrases(text, StyleId(1))
+        self.assertEqual(expected, actual, f"case(text:{text})")
+
+    def test_upspeak_voiced_last_mora(self):
+        # voiced + "？" + flagON -> upspeak
+        expected = koreha_arimasuka_base_expected()
+        expected[-1].is_interrogative = True
+        expected[-1].moras += [
+            Mora(
+                text="ア",
+                consonant=None,
+                consonant_length=None,
+                vowel="a",
+                vowel_length=0.15,
+                pitch=expected[-1].moras[-1].pitch + 0.3,
+            )
+        ]
+        self.create_synthesis_test_base(
+            text="これはありますか？",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+        # voiced + "？" + flagOFF -> non-upspeak
+        expected = koreha_arimasuka_base_expected()
+        expected[-1].is_interrogative = True
+        self.create_synthesis_test_base(
+            text="これはありますか？",
+            expected=expected,
+            enable_interrogative_upspeak=False,
+        )
+
+        # voiced + "" + flagON -> non-upspeak
+        expected = koreha_arimasuka_base_expected()
+        self.create_synthesis_test_base(
+            text="これはありますか",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+    def test_upspeak_voiced_N_last_mora(self):
+        def nn_base_expected():
+            return [
+                AccentPhrase(
+                    moras=[
+                        Mora(
+                            text="ン",
+                            consonant=None,
+                            consonant_length=None,
+                            vowel="N",
+                            vowel_length=1.25,
+                            pitch=1.44,
+                        )
+                    ],
+                    accent=1,
+                    pause_mora=None,
+                    is_interrogative=False,
+                )
+            ]
+
+        # voiced + "" + flagON -> upspeak
+        expected = nn_base_expected()
+        self.create_synthesis_test_base(
+            text="ん",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+        # voiced + "？" + flagON -> upspeak
+        expected = nn_base_expected()
+        expected[-1].is_interrogative = True
+        expected[-1].moras += [
+            Mora(
+                text="ン",
+                consonant=None,
+                consonant_length=None,
+                vowel="N",
+                vowel_length=0.15,
+                pitch=expected[-1].moras[-1].pitch + 0.3,
+            )
+        ]
+        self.create_synthesis_test_base(
+            text="ん？",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+        # voiced + "？" + flagOFF -> non-upspeak
+        expected = nn_base_expected()
+        expected[-1].is_interrogative = True
+        self.create_synthesis_test_base(
+            text="ん？",
+            expected=expected,
+            enable_interrogative_upspeak=False,
+        )
+
+    def test_upspeak_unvoiced_last_mora(self):
+        def ltu_base_expected():
+            return [
+                AccentPhrase(
+                    moras=[
+                        Mora(
+                            text="ッ",
+                            consonant=None,
+                            consonant_length=None,
+                            vowel="cl",
+                            vowel_length=1.69,
+                            pitch=0.0,
+                        )
+                    ],
+                    accent=1,
+                    pause_mora=None,
+                    is_interrogative=False,
+                )
+            ]
+
+        # unvoiced + "" + flagON -> non-upspeak
+        expected = ltu_base_expected()
+        self.create_synthesis_test_base(
+            text="っ",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+        # unvoiced + "？" + flagON -> non-upspeak
+        expected = ltu_base_expected()
+        expected[-1].is_interrogative = True
+        self.create_synthesis_test_base(
+            text="っ？",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+        # unvoiced + "？" + flagOFF -> non-upspeak
+        expected = ltu_base_expected()
+        expected[-1].is_interrogative = True
+        self.create_synthesis_test_base(
+            text="っ？",
+            expected=expected,
+            enable_interrogative_upspeak=False,
+        )
+
+    def test_upspeak_voiced_u_last_mora(self):
+        def su_base_expected():
+            return [
+                AccentPhrase(
+                    moras=[
+                        Mora(
+                            text="ス",
+                            consonant="s",
+                            consonant_length=3.19,
+                            vowel="u",
+                            vowel_length=3.5,
+                            pitch=5.94,
+                        )
+                    ],
+                    accent=1,
+                    pause_mora=None,
+                    is_interrogative=False,
+                )
+            ]
+
+        # voiced + "" + flagON -> non-upspeak
+        expected = su_base_expected()
+        self.create_synthesis_test_base(
+            text="す",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+        # voiced + "？" + flagON -> upspeak
+        expected = su_base_expected()
+        expected[-1].is_interrogative = True
+        expected[-1].moras += [
+            Mora(
+                text="ウ",
+                consonant=None,
+                consonant_length=None,
+                vowel="u",
+                vowel_length=0.15,
+                pitch=expected[-1].moras[-1].pitch + 0.3,
+            )
+        ]
+        self.create_synthesis_test_base(
+            text="す？",
+            expected=expected,
+            enable_interrogative_upspeak=True,
+        )
+
+        # voiced + "？" + flagOFF -> non-upspeak
+        expected = su_base_expected()
+        expected[-1].is_interrogative = True
+        self.create_synthesis_test_base(
+            text="す？",
+            expected=expected,
+            enable_interrogative_upspeak=False,
+        )
diff --git a/test/tts_pipeline/test_tts_engine_base.py b/test/tts_pipeline/test_tts_engine_base.py
deleted file mode 100644
index 07f81b416..000000000
--- a/test/tts_pipeline/test_tts_engine_base.py
+++ /dev/null
@@ -1,316 +0,0 @@
-from unittest import TestCase
-
-from voicevox_engine.dev.core.mock import MockCoreWrapper
-from voicevox_engine.metas.Metas import StyleId
-from voicevox_engine.model import AccentPhrase, Mora
-from voicevox_engine.tts_pipeline.tts_engine import (  # FIXME: この関数を使うテストをTTSEngine用のテストに移動する
-    TTSEngine,
-    apply_interrogative_upspeak,
-)
-
-
-def koreha_arimasuka_base_expected():
-    return [
-        AccentPhrase(
-            moras=[
-                Mora(
-                    text="コ",
-                    consonant="k",
-                    consonant_length=2.44,
-                    vowel="o",
-                    vowel_length=2.88,
-                    pitch=4.38,
-                ),
-                Mora(
-                    text="レ",
-                    consonant="r",
-                    consonant_length=3.06,
-                    vowel="e",
-                    vowel_length=1.88,
-                    pitch=4.0,
-                ),
-                Mora(
-                    text="ワ",
-                    consonant="w",
-                    consonant_length=3.62,
-                    vowel="a",
-                    vowel_length=1.44,
-                    pitch=4.19,
-                ),
-            ],
-            accent=3,
-            pause_mora=None,
-            is_interrogative=False,
-        ),
-        AccentPhrase(
-            moras=[
-                Mora(
-                    text="ア",
-                    consonant=None,
-                    consonant_length=None,
-                    vowel="a",
-                    vowel_length=1.44,
-                    pitch=1.44,
-                ),
-                Mora(
-                    text="リ",
-                    consonant="r",
-                    consonant_length=3.06,
-                    vowel="i",
-                    vowel_length=2.31,
-                    pitch=4.44,
-                ),
-                Mora(
-                    text="マ",
-                    consonant="m",
-                    consonant_length=2.62,
-                    vowel="a",
-                    vowel_length=1.44,
-                    pitch=3.12,
-                ),
-                Mora(
-                    text="ス",
-                    consonant="s",
-                    consonant_length=3.19,
-                    vowel="U",
-                    vowel_length=1.38,
-                    pitch=0.0,
-                ),
-                Mora(
-                    text="カ",
-                    consonant="k",
-                    consonant_length=2.44,
-                    vowel="a",
-                    vowel_length=1.44,
-                    pitch=2.94,
-                ),
-            ],
-            accent=3,
-            pause_mora=None,
-            is_interrogative=False,
-        ),
-    ]
-
-
-class TestTTSEngineBase(TestCase):
-    def setUp(self):
-        super().setUp()
-        self.tts_engine = TTSEngine(core=MockCoreWrapper())
-
-    def create_synthesis_test_base(
-        self,
-        text: str,
-        expected: list[AccentPhrase],
-        enable_interrogative_upspeak: bool,
-    ) -> None:
-        """音声合成時に疑問文モーラ処理を行っているかどうかを検証
-        (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
-        """
-        inputs = self.tts_engine.create_accent_phrases(text, StyleId(1))
-        outputs = apply_interrogative_upspeak(inputs, enable_interrogative_upspeak)
-        self.assertEqual(expected, outputs, f"case(text:{text})")
-
-    def test_create_accent_phrases(self):
-        """accent_phrasesの作成時では疑問文モーラ処理を行わない
-        (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
-        """
-        text = "これはありますか？"
-        expected = koreha_arimasuka_base_expected()
-        expected[-1].is_interrogative = True
-        actual = self.tts_engine.create_accent_phrases(text, StyleId(1))
-        self.assertEqual(expected, actual, f"case(text:{text})")
-
-    def test_upspeak_voiced_last_mora(self):
-        # voiced + "？" + flagON -> upspeak
-        expected = koreha_arimasuka_base_expected()
-        expected[-1].is_interrogative = True
-        expected[-1].moras += [
-            Mora(
-                text="ア",
-                consonant=None,
-                consonant_length=None,
-                vowel="a",
-                vowel_length=0.15,
-                pitch=expected[-1].moras[-1].pitch + 0.3,
-            )
-        ]
-        self.create_synthesis_test_base(
-            text="これはありますか？",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-        # voiced + "？" + flagOFF -> non-upspeak
-        expected = koreha_arimasuka_base_expected()
-        expected[-1].is_interrogative = True
-        self.create_synthesis_test_base(
-            text="これはありますか？",
-            expected=expected,
-            enable_interrogative_upspeak=False,
-        )
-
-        # voiced + "" + flagON -> non-upspeak
-        expected = koreha_arimasuka_base_expected()
-        self.create_synthesis_test_base(
-            text="これはありますか",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-    def test_upspeak_voiced_N_last_mora(self):
-        def nn_base_expected():
-            return [
-                AccentPhrase(
-                    moras=[
-                        Mora(
-                            text="ン",
-                            consonant=None,
-                            consonant_length=None,
-                            vowel="N",
-                            vowel_length=1.25,
-                            pitch=1.44,
-                        )
-                    ],
-                    accent=1,
-                    pause_mora=None,
-                    is_interrogative=False,
-                )
-            ]
-
-        # voiced + "" + flagON -> upspeak
-        expected = nn_base_expected()
-        self.create_synthesis_test_base(
-            text="ん",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-        # voiced + "？" + flagON -> upspeak
-        expected = nn_base_expected()
-        expected[-1].is_interrogative = True
-        expected[-1].moras += [
-            Mora(
-                text="ン",
-                consonant=None,
-                consonant_length=None,
-                vowel="N",
-                vowel_length=0.15,
-                pitch=expected[-1].moras[-1].pitch + 0.3,
-            )
-        ]
-        self.create_synthesis_test_base(
-            text="ん？",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-        # voiced + "？" + flagOFF -> non-upspeak
-        expected = nn_base_expected()
-        expected[-1].is_interrogative = True
-        self.create_synthesis_test_base(
-            text="ん？",
-            expected=expected,
-            enable_interrogative_upspeak=False,
-        )
-
-    def test_upspeak_unvoiced_last_mora(self):
-        def ltu_base_expected():
-            return [
-                AccentPhrase(
-                    moras=[
-                        Mora(
-                            text="ッ",
-                            consonant=None,
-                            consonant_length=None,
-                            vowel="cl",
-                            vowel_length=1.69,
-                            pitch=0.0,
-                        )
-                    ],
-                    accent=1,
-                    pause_mora=None,
-                    is_interrogative=False,
-                )
-            ]
-
-        # unvoiced + "" + flagON -> non-upspeak
-        expected = ltu_base_expected()
-        self.create_synthesis_test_base(
-            text="っ",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-        # unvoiced + "？" + flagON -> non-upspeak
-        expected = ltu_base_expected()
-        expected[-1].is_interrogative = True
-        self.create_synthesis_test_base(
-            text="っ？",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-        # unvoiced + "？" + flagOFF -> non-upspeak
-        expected = ltu_base_expected()
-        expected[-1].is_interrogative = True
-        self.create_synthesis_test_base(
-            text="っ？",
-            expected=expected,
-            enable_interrogative_upspeak=False,
-        )
-
-    def test_upspeak_voiced_u_last_mora(self):
-        def su_base_expected():
-            return [
-                AccentPhrase(
-                    moras=[
-                        Mora(
-                            text="ス",
-                            consonant="s",
-                            consonant_length=3.19,
-                            vowel="u",
-                            vowel_length=3.5,
-                            pitch=5.94,
-                        )
-                    ],
-                    accent=1,
-                    pause_mora=None,
-                    is_interrogative=False,
-                )
-            ]
-
-        # voiced + "" + flagON -> non-upspeak
-        expected = su_base_expected()
-        self.create_synthesis_test_base(
-            text="す",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-        # voiced + "？" + flagON -> upspeak
-        expected = su_base_expected()
-        expected[-1].is_interrogative = True
-        expected[-1].moras += [
-            Mora(
-                text="ウ",
-                consonant=None,
-                consonant_length=None,
-                vowel="u",
-                vowel_length=0.15,
-                pitch=expected[-1].moras[-1].pitch + 0.3,
-            )
-        ]
-        self.create_synthesis_test_base(
-            text="す？",
-            expected=expected,
-            enable_interrogative_upspeak=True,
-        )
-
-        # voiced + "？" + flagOFF -> non-upspeak
-        expected = su_base_expected()
-        expected[-1].is_interrogative = True
-        self.create_synthesis_test_base(
-            text="す？",
-            expected=expected,
-            enable_interrogative_upspeak=False,
-        )

From abd6153abcc252b67b86f8cefd95ba728c130844 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 9 Jan 2024 08:46:31 +0900
Subject: [PATCH 130/177] =?UTF-8?q?pyinstaller=E3=81=AEbootloader=E3=81=AE?=
 =?UTF-8?q?=E8=87=AA=E5=89=8D=E3=83=93=E3=83=AB=E3=83=89=E3=81=AB=E3=81=8A?=
 =?UTF-8?q?=E3=81=BE=E3=81=98=E3=81=AA=E3=81=84=E5=8A=B9=E6=9E=9C=E3=81=8C?=
 =?UTF-8?q?=E3=81=82=E3=82=8B=E3=81=A3=E3=81=BD=E3=81=84=E3=81=93=E3=81=A8?=
 =?UTF-8?q?=E3=82=92=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E8=BF=BD=E8=A8=98?=
 =?UTF-8?q?=E3=81=99=E3=82=8B=20(#980)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build_util/modify_pyinstaller.bash | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build_util/modify_pyinstaller.bash b/build_util/modify_pyinstaller.bash
index a38e1656f..26b3e337e 100755
--- a/build_util/modify_pyinstaller.bash
+++ b/build_util/modify_pyinstaller.bash
@@ -4,6 +4,8 @@
 # 良いGPUが自動的に選択されるようにしている
 # https://github.com/VOICEVOX/voicevox_engine/issues/502
 
+# 自前ビルドすることでブートローダーのハッシュ値が変わってウイルス判定を回避する効果もあるかも
+
 set -eux
 
 pyinstaller_version=$(pyinstaller -v)

From f0032143620deb007b398970ddd4e4f74086b4ee Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 9 Jan 2024 21:57:54 +0900
Subject: [PATCH 131/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`test=5Fmora=5Fto?=
 =?UTF-8?q?=5Ftext.py`=20=E7=A7=BB=E6=A4=8D=20(#996)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `test_mora_to_text.py` 移植
---
 test/tts_pipeline/test_mora_to_text.py  | 28 -------------------------
 test/tts_pipeline/test_text_analyzer.py | 26 +++++++++++++++++++++++
 2 files changed, 26 insertions(+), 28 deletions(-)
 delete mode 100644 test/tts_pipeline/test_mora_to_text.py

diff --git a/test/tts_pipeline/test_mora_to_text.py b/test/tts_pipeline/test_mora_to_text.py
deleted file mode 100644
index 8fcc6bd63..000000000
--- a/test/tts_pipeline/test_mora_to_text.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from unittest import TestCase
-
-from voicevox_engine.tts_pipeline.text_analyzer import mora_to_text
-
-
-class TestMoraToText(TestCase):
-    def test_voice(self):
-        self.assertEqual(mora_to_text("a"), "ア")
-        self.assertEqual(mora_to_text("i"), "イ")
-        self.assertEqual(mora_to_text("ka"), "カ")
-        self.assertEqual(mora_to_text("N"), "ン")
-        self.assertEqual(mora_to_text("cl"), "ッ")
-        self.assertEqual(mora_to_text("gye"), "ギェ")
-        self.assertEqual(mora_to_text("ye"), "イェ")
-        self.assertEqual(mora_to_text("wo"), "ウォ")
-
-    def test_unvoice(self):
-        self.assertEqual(mora_to_text("A"), "ア")
-        self.assertEqual(mora_to_text("I"), "イ")
-        self.assertEqual(mora_to_text("kA"), "カ")
-        self.assertEqual(mora_to_text("gyE"), "ギェ")
-        self.assertEqual(mora_to_text("yE"), "イェ")
-        self.assertEqual(mora_to_text("wO"), "ウォ")
-
-    def test_invalid_mora(self):
-        """変なモーラが来ても例外を投げない"""
-        self.assertEqual(mora_to_text("x"), "x")
-        self.assertEqual(mora_to_text(""), "")
diff --git a/test/tts_pipeline/test_text_analyzer.py b/test/tts_pipeline/test_text_analyzer.py
index 2e3bb376a..ebaf30977 100644
--- a/test/tts_pipeline/test_text_analyzer.py
+++ b/test/tts_pipeline/test_text_analyzer.py
@@ -7,6 +7,7 @@
     Label,
     MoraLabel,
     UtteranceLabel,
+    mora_to_text,
     text_to_accent_phrases,
 )
 
@@ -320,6 +321,31 @@ def test_features(self):
         self.assertEqual(features(self.utterance_hello_hiho), self.test_case_hello_hiho)
 
 
+class TestMoraToText(TestCase):
+    def test_voice(self):
+        self.assertEqual(mora_to_text("a"), "ア")
+        self.assertEqual(mora_to_text("i"), "イ")
+        self.assertEqual(mora_to_text("ka"), "カ")
+        self.assertEqual(mora_to_text("N"), "ン")
+        self.assertEqual(mora_to_text("cl"), "ッ")
+        self.assertEqual(mora_to_text("gye"), "ギェ")
+        self.assertEqual(mora_to_text("ye"), "イェ")
+        self.assertEqual(mora_to_text("wo"), "ウォ")
+
+    def test_unvoice(self):
+        self.assertEqual(mora_to_text("A"), "ア")
+        self.assertEqual(mora_to_text("I"), "イ")
+        self.assertEqual(mora_to_text("kA"), "カ")
+        self.assertEqual(mora_to_text("gyE"), "ギェ")
+        self.assertEqual(mora_to_text("yE"), "イェ")
+        self.assertEqual(mora_to_text("wO"), "ウォ")
+
+    def test_invalid_mora(self):
+        """変なモーラが来ても例外を投げない"""
+        self.assertEqual(mora_to_text("x"), "x")
+        self.assertEqual(mora_to_text(""), "")
+
+
 def _gen_mora(text: str, consonant: str | None, vowel: str) -> Mora:
     return Mora(
         text=text,

From 90b282c1672fa2680ed4efe31ce08fec26a57828 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 10 Jan 2024 00:44:45 +0900
Subject: [PATCH 132/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E9=9F=B3?=
 =?UTF-8?q?=E7=B4=A0=E8=AD=98=E5=88=A5=E3=81=AE=E3=83=A1=E3=82=BD=E3=83=83?=
 =?UTF-8?q?=E3=83=89=E5=8C=96=20(#993)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: 音素識別のメソッド化

* fix: モーラ末尾音素リネーム
---
 test/tts_pipeline/test_tts_engine.py                | 12 +++++++-----
 .../tts_pipeline/acoustic_feature_extractor.py      | 11 +++++++++++
 voicevox_engine/tts_pipeline/tts_engine.py          | 13 ++++---------
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index 62a34ee6d..181933161 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -11,7 +11,10 @@
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
+from voicevox_engine.tts_pipeline.acoustic_feature_extractor import (
+    UNVOICED_MORA_TAIL_PHONEMES,
+    Phoneme,
+)
 from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
@@ -29,7 +32,6 @@
     split_mora,
     to_flatten_moras,
     to_flatten_phonemes,
-    unvoiced_vowel_likes,
 )
 
 from .test_text_analyzer import stub_unknown_features_koxx
@@ -608,10 +610,10 @@ def test_update_pitch(self):
 
         def result_value(i: int) -> float:
             # unvoiced_vowel_likesのPhoneme ID版
-            unvoiced_vowel_like_ids = [
-                Phoneme(p).phoneme_id for p in unvoiced_vowel_likes
+            unvoiced_mora_tail_ids = [
+                Phoneme(p).phoneme_id for p in UNVOICED_MORA_TAIL_PHONEMES
             ]
-            if vowel_phoneme_list[i] in unvoiced_vowel_like_ids:
+            if vowel_phoneme_list[i] in unvoiced_mora_tail_ids:
                 return 0
             return round(
                 (
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index 6c861e16a..76bba5357 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -100,6 +100,9 @@
 # 音素リストの要素数
 _NUM_PHONEME = len(_PHONEME_LIST)
 
+UNVOICED_MORA_TAIL_PHONEMES = ["A", "I", "U", "E", "O", "cl", "pau"]
+MORA_TAIL_PHONEMES = ["a", "i", "u", "e", "o", "N"] + UNVOICED_MORA_TAIL_PHONEMES
+
 
 class Phoneme:
     """音素"""
@@ -127,3 +130,11 @@ def onehot(self) -> NDArray[np.float32]:
         vec = np.zeros(self._NUM_PHONEME, dtype=np.float32)
         vec[self.phoneme_id] = 1.0
         return vec
+
+    def is_mora_tail(self) -> bool:
+        """この音素はモーラ末尾音素（母音・撥音・促音・無音）である"""
+        return self.phoneme in MORA_TAIL_PHONEMES
+
+    def is_unvoiced_mora_tail(self) -> bool:
+        """この音素は無声のモーラ末尾音素（無声母音・促音・無音）である"""
+        return self.phoneme in UNVOICED_MORA_TAIL_PHONEMES
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index d2b9adc91..0fbdc054c 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -14,9 +14,6 @@
 from .mora_list import mora_phonemes_to_mora_kana
 from .text_analyzer import text_to_accent_phrases
 
-unvoiced_vowel_likes = ["A", "I", "U", "E", "O", "cl", "pau"]
-mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_vowel_likes
-
 # 疑問文語尾定数
 UPSPEAK_LENGTH = 0.15
 UPSPEAK_PITCH_ADD = 0.3
@@ -59,10 +56,10 @@ def split_mora(phonemes: list[Phoneme]) -> tuple[list[Phoneme | None], list[Phon
     consonants: list[Phoneme | None] = []
     vowels: list[Phoneme] = []
     for i, p in enumerate(phonemes):
-        if p.phoneme in mora_phoneme_list:
+        if p.is_mora_tail():
             vowels += [p]
             # Vowel のみのモーラの場合（Vowel が連続する場合）、Consonant を None とする
-            if i == 0 or phonemes[i - 1].phoneme in mora_phoneme_list:
+            if i == 0 or phonemes[i - 1].is_mora_tail():
                 consonants += [None]
         else:
             consonants += [p]
@@ -271,9 +268,7 @@ def update_length(
         phoneme_lengths = self._core.safe_yukarin_s_forward(phoneme_ids, style_id)
 
         # 生成結果でモーラ内の音素長属性を置換する
-        vowel_indexes = [
-            i for i, p in enumerate(phonemes) if p.phoneme in mora_phoneme_list
-        ]
+        vowel_indexes = [i for i, p in enumerate(phonemes) if p.is_mora_tail()]
         for i, mora in enumerate(moras):
             if mora.consonant is None:
                 mora.consonant_length = None
@@ -348,7 +343,7 @@ def update_pitch(
 
         # 母音が無声であるモーラは音高を 0 とする
         for i, p in enumerate(vowels):
-            if p.phoneme in unvoiced_vowel_likes:
+            if p.is_unvoiced_mora_tail():
                 f0[i] = 0
 
         # 更新する

From 1a5c8be7e732906f1ffbe360ad21a8ced47ecba9 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 10 Jan 2024 01:07:22 +0900
Subject: [PATCH 133/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E6=B3=A2?=
 =?UTF-8?q?=E5=BD=A2=E5=90=88=E6=88=90=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE?=
 =?UTF-8?q?=E5=88=86=E9=9B=A2=20(#998)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: 波形合成テストの移植

* fix: lint

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/tts_pipeline/test_tts_engine.py       | 306 +------------------
 test/tts_pipeline/test_wave_synthesizer.py | 332 +++++++++++++++++++++
 2 files changed, 333 insertions(+), 305 deletions(-)
 create mode 100644 test/tts_pipeline/test_wave_synthesizer.py

diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index 181933161..8a52ccc45 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -10,7 +10,7 @@
 
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
+from voicevox_engine.model import AccentPhrase, Mora
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import (
     UNVOICED_MORA_TAIL_PHONEMES,
     Phoneme,
@@ -19,16 +19,6 @@
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
     apply_interrogative_upspeak,
-    apply_intonation_scale,
-    apply_output_sampling_rate,
-    apply_output_stereo,
-    apply_pitch_scale,
-    apply_prepost_silence,
-    apply_speed_scale,
-    apply_volume_scale,
-    count_frame_per_unit,
-    query_to_decoder_feature,
-    raw_wave_to_output_wave,
     split_mora,
     to_flatten_moras,
     to_flatten_phonemes,
@@ -119,32 +109,6 @@ def is_model_loaded(self, style_id):
         return True
 
 
-def _gen_query(
-    accent_phrases: list[AccentPhrase] | None = None,
-    speedScale: float = 1.0,
-    pitchScale: float = 1.0,
-    intonationScale: float = 1.0,
-    prePhonemeLength: float = 0.0,
-    postPhonemeLength: float = 0.0,
-    volumeScale: float = 1.0,
-    outputSamplingRate: int = 24000,
-    outputStereo: bool = False,
-) -> AudioQuery:
-    """Generate AudioQuery with default meaningless arguments for test simplicity."""
-    accent_phrases = [] if accent_phrases is None else accent_phrases
-    return AudioQuery(
-        accent_phrases=accent_phrases,
-        speedScale=speedScale,
-        pitchScale=pitchScale,
-        intonationScale=intonationScale,
-        prePhonemeLength=prePhonemeLength,
-        postPhonemeLength=postPhonemeLength,
-        volumeScale=volumeScale,
-        outputSamplingRate=outputSamplingRate,
-        outputStereo=outputStereo,
-    )
-
-
 def _gen_mora(
     text: str,
     consonant: str | None,
@@ -182,274 +146,6 @@ def test_to_flatten_phonemes():
     assert true_phonemes == phonemes
 
 
-def test_apply_prepost_silence():
-    """Test `apply_prepost_silence`."""
-    # Inputs
-    query = _gen_query(prePhonemeLength=2 * 0.01067, postPhonemeLength=6 * 0.01067)
-    moras = [
-        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 100.0),
-    ]
-
-    # Expects
-    true_moras_with_silence = [
-        _gen_mora("　", None, None, "sil", 2 * 0.01067, 0.0),
-        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 100.0),
-        _gen_mora("　", None, None, "sil", 6 * 0.01067, 0.0),
-    ]
-
-    # Outputs
-    moras_with_silence = apply_prepost_silence(moras, query)
-
-    assert moras_with_silence == true_moras_with_silence
-
-
-def test_apply_speed_scale():
-    """Test `apply_speed_scale`."""
-    # Inputs
-    query = _gen_query(speedScale=2.0)
-    input_moras = [
-        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
-        _gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
-        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
-        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
-        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
-    ]
-
-    # Expects - x2 fast
-    true_moras = [
-        _gen_mora("コ", "k", 1 * 0.01067, "o", 2 * 0.01067, 50.0),
-        _gen_mora("ン", None, None, "N", 2 * 0.01067, 50.0),
-        _gen_mora("、", None, None, "pau", 1 * 0.01067, 0.0),
-        _gen_mora("ヒ", "h", 1 * 0.01067, "i", 2 * 0.01067, 125.0),
-        _gen_mora("ホ", "h", 2 * 0.01067, "O", 1 * 0.01067, 0.0),
-    ]
-
-    # Outputs
-    moras = apply_speed_scale(input_moras, query)
-
-    assert moras == true_moras
-
-
-def test_apply_pitch_scale():
-    """Test `apply_pitch_scale`."""
-    # Inputs
-    query = _gen_query(pitchScale=2.0)
-    input_moras = [
-        _gen_mora("コ", "k", 0.0, "o", 0.0, 50.0),
-        _gen_mora("ン", None, None, "N", 0.0, 50.0),
-        _gen_mora("、", None, None, "pau", 0.0, 0.0),
-        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 125.0),
-        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
-    ]
-
-    # Expects - x4 value scaled
-    true_moras = [
-        _gen_mora("コ", "k", 0.0, "o", 0.0, 200.0),
-        _gen_mora("ン", None, None, "N", 0.0, 200.0),
-        _gen_mora("、", None, None, "pau", 0.0, 0.0),
-        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 500.0),
-        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
-    ]
-
-    # Outputs
-    moras = apply_pitch_scale(input_moras, query)
-
-    assert moras == true_moras
-
-
-def test_apply_intonation_scale():
-    """Test `apply_intonation_scale`."""
-    # Inputs
-    query = _gen_query(intonationScale=0.5)
-    input_moras = [
-        _gen_mora("コ", "k", 0.0, "o", 0.0, 200.0),
-        _gen_mora("ン", None, None, "N", 0.0, 200.0),
-        _gen_mora("、", None, None, "pau", 0.0, 0.0),
-        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 500.0),
-        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
-    ]
-
-    # Expects - mean=300 var x0.5 intonation scaling
-    true_moras = [
-        _gen_mora("コ", "k", 0.0, "o", 0.0, 250.0),
-        _gen_mora("ン", None, None, "N", 0.0, 250.0),
-        _gen_mora("、", None, None, "pau", 0.0, 0.0),
-        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 400.0),
-        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
-    ]
-
-    # Outputs
-    moras = apply_intonation_scale(input_moras, query)
-
-    assert moras == true_moras
-
-
-def test_apply_volume_scale():
-    """Test `apply_volume_scale`."""
-    # Inputs
-    query = _gen_query(volumeScale=3.0)
-    input_wave = numpy.array([0.0, 1.0, 2.0])
-
-    # Expects - x3 scale
-    true_wave = numpy.array([0.0, 3.0, 6.0])
-
-    # Outputs
-    wave = apply_volume_scale(input_wave, query)
-
-    assert numpy.allclose(wave, true_wave)
-
-
-def test_apply_output_sampling_rate():
-    """Test `apply_output_sampling_rate`."""
-    # Inputs
-    query = _gen_query(outputSamplingRate=12000)
-    input_wave = numpy.array([1.0 for _ in range(120)])
-    input_sr_wave = 24000
-
-    # Expects - half sampling rate
-    true_wave = numpy.array([1.0 for _ in range(60)])
-    assert true_wave.shape == (60,), "Prerequisites"
-
-    # Outputs
-    wave = apply_output_sampling_rate(input_wave, input_sr_wave, query)
-
-    assert wave.shape[0] == true_wave.shape[0]
-
-
-def test_apply_output_stereo():
-    """Test `apply_output_stereo`."""
-    # Inputs
-    query = _gen_query(outputStereo=True)
-    input_wave = numpy.array([1.0, 0.0, 2.0])
-
-    # Expects - Stereo :: (Time, Channel)
-    true_wave = numpy.array([[1.0, 1.0], [0.0, 0.0], [2.0, 2.0]])
-
-    # Outputs
-    wave = apply_output_stereo(input_wave, query)
-
-    assert numpy.array_equal(wave, true_wave)
-
-
-def test_count_frame_per_unit():
-    """Test `count_frame_per_unit`."""
-    # Inputs
-    moras = [
-        _gen_mora("　", None, None, "　", 2 * 0.01067, 0.0),  # 0.01067 [sec/frame]
-        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 0.0),
-        _gen_mora("ン", None, None, "N", 4 * 0.01067, 0.0),
-        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
-        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 0.0),
-        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
-        _gen_mora("　", None, None, "　", 6 * 0.01067, 0.0),
-    ]
-
-    # Expects
-    #                             Pre k  o  N pau h  i  h  O Pst
-    true_frame_per_phoneme_list = [2, 2, 4, 4, 2, 2, 4, 4, 2, 6]
-    true_frame_per_phoneme = numpy.array(true_frame_per_phoneme_list, dtype=numpy.int32)
-    #                         Pre ko  N pau hi hO Pst
-    true_frame_per_mora_list = [2, 6, 4, 2, 6, 6, 6]
-    true_frame_per_mora = numpy.array(true_frame_per_mora_list, dtype=numpy.int32)
-
-    # Outputs
-    frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
-
-    assert numpy.array_equal(frame_per_phoneme, true_frame_per_phoneme)
-    assert numpy.array_equal(frame_per_mora, true_frame_per_mora)
-
-
-def test_query_to_decoder_feature():
-    """Test `query_to_decoder_feature`."""
-    # Inputs
-    accent_phrases = [
-        AccentPhrase(
-            moras=[
-                _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
-                _gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
-            ],
-            accent=1,
-            pause_mora=_gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
-        ),
-        AccentPhrase(
-            moras=[
-                _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
-                _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
-            ],
-            accent=1,
-            pause_mora=None,
-        ),
-    ]
-    query = _gen_query(
-        accent_phrases=accent_phrases,
-        speedScale=2.0,
-        pitchScale=2.0,
-        intonationScale=0.5,
-        prePhonemeLength=2 * 0.01067,
-        postPhonemeLength=6 * 0.01067,
-    )
-
-    # Expects
-    # frame_per_phoneme
-    #                        Pre k  o  N pau h  i  h  O Pst
-    true_frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
-    n_frame = sum(true_frame_per_phoneme)
-    # phoneme
-    #                     Pr  k   o   o  N  N pau  h   i   i   h   h  O Pt Pt Pt
-    frame_phoneme_idxs = [0, 23, 30, 30, 4, 4, 0, 19, 21, 21, 19, 19, 5, 0, 0, 0]
-    true_phoneme = numpy.zeros([n_frame, TRUE_NUM_PHONEME], dtype=numpy.float32)
-    for frame_idx, phoneme_idx in enumerate(frame_phoneme_idxs):
-        true_phoneme[frame_idx, phoneme_idx] = 1.0
-    # Pitch
-    #                   paw ko  N pau hi hO paw
-    # frame_per_vowel = [1, 3,  2, 1, 3, 3, 3]
-    #           pau   ko     ko     ko      N      N
-    true1_f0 = [0.0, 250.0, 250.0, 250.0, 250.0, 250.0]
-    #           pau   hi     hi     hi
-    true2_f0 = [0.0, 400.0, 400.0, 400.0]
-    #           hO   hO   hO   paw  paw  paw
-    true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-    true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
-
-    # Outputs
-    phoneme, f0 = query_to_decoder_feature(query)
-
-    assert numpy.array_equal(phoneme, true_phoneme)
-    assert numpy.array_equal(f0, true_f0)
-
-
-def test_raw_wave_to_output_wave_with_resample():
-    """Test `raw_wave_to_output_wave` with resampling option."""
-    # Inputs
-    query = _gen_query(volumeScale=2, outputSamplingRate=48000, outputStereo=True)
-    raw_wave = numpy.random.rand(240)
-    sr_raw_wave = 24000
-
-    # Expects
-    true_wave_shape = (480, 2)
-
-    # Outputs
-    wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
-
-    assert wave.shape == true_wave_shape
-
-
-def test_raw_wave_to_output_wave_without_resample():
-    """Test `raw_wave_to_output_wave`  without resampling option."""
-    # Inputs
-    query = _gen_query(volumeScale=2, outputStereo=True)
-    raw_wave = numpy.random.rand(240)
-    sr_raw_wave = 24000
-
-    # Expects
-    true_wave = numpy.array([2 * raw_wave, 2 * raw_wave]).T
-
-    # Outputs
-    wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
-
-    assert numpy.allclose(wave, true_wave)
-
-
 def _gen_hello_hiho_accent_phrases() -> list[AccentPhrase]:
     return [
         AccentPhrase(
diff --git a/test/tts_pipeline/test_wave_synthesizer.py b/test/tts_pipeline/test_wave_synthesizer.py
new file mode 100644
index 000000000..2ece25088
--- /dev/null
+++ b/test/tts_pipeline/test_wave_synthesizer.py
@@ -0,0 +1,332 @@
+"""波形合成のテスト"""
+
+import numpy
+
+from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
+from voicevox_engine.tts_pipeline.tts_engine import (
+    apply_intonation_scale,
+    apply_output_sampling_rate,
+    apply_output_stereo,
+    apply_pitch_scale,
+    apply_prepost_silence,
+    apply_speed_scale,
+    apply_volume_scale,
+    count_frame_per_unit,
+    query_to_decoder_feature,
+    raw_wave_to_output_wave,
+)
+
+TRUE_NUM_PHONEME = 45
+
+
+def _gen_query(
+    accent_phrases: list[AccentPhrase] | None = None,
+    speedScale: float = 1.0,
+    pitchScale: float = 1.0,
+    intonationScale: float = 1.0,
+    prePhonemeLength: float = 0.0,
+    postPhonemeLength: float = 0.0,
+    volumeScale: float = 1.0,
+    outputSamplingRate: int = 24000,
+    outputStereo: bool = False,
+) -> AudioQuery:
+    """Generate AudioQuery with default meaningless arguments for test simplicity."""
+    accent_phrases = [] if accent_phrases is None else accent_phrases
+    return AudioQuery(
+        accent_phrases=accent_phrases,
+        speedScale=speedScale,
+        pitchScale=pitchScale,
+        intonationScale=intonationScale,
+        prePhonemeLength=prePhonemeLength,
+        postPhonemeLength=postPhonemeLength,
+        volumeScale=volumeScale,
+        outputSamplingRate=outputSamplingRate,
+        outputStereo=outputStereo,
+    )
+
+
+def _gen_mora(
+    text: str,
+    consonant: str | None,
+    consonant_length: float | None,
+    vowel: str,
+    vowel_length: float,
+    pitch: float,
+) -> Mora:
+    """Generate Mora with positional arguments for test simplicity."""
+    return Mora(
+        text=text,
+        consonant=consonant,
+        consonant_length=consonant_length,
+        vowel=vowel,
+        vowel_length=vowel_length,
+        pitch=pitch,
+    )
+
+
+def test_apply_prepost_silence():
+    """Test `apply_prepost_silence`."""
+    # Inputs
+    query = _gen_query(prePhonemeLength=2 * 0.01067, postPhonemeLength=6 * 0.01067)
+    moras = [
+        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 100.0),
+    ]
+
+    # Expects
+    true_moras_with_silence = [
+        _gen_mora("　", None, None, "sil", 2 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 100.0),
+        _gen_mora("　", None, None, "sil", 6 * 0.01067, 0.0),
+    ]
+
+    # Outputs
+    moras_with_silence = apply_prepost_silence(moras, query)
+
+    assert moras_with_silence == true_moras_with_silence
+
+
+def test_apply_speed_scale():
+    """Test `apply_speed_scale`."""
+    # Inputs
+    query = _gen_query(speedScale=2.0)
+    input_moras = [
+        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
+        _gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
+        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
+        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
+    ]
+
+    # Expects - x2 fast
+    true_moras = [
+        _gen_mora("コ", "k", 1 * 0.01067, "o", 2 * 0.01067, 50.0),
+        _gen_mora("ン", None, None, "N", 2 * 0.01067, 50.0),
+        _gen_mora("、", None, None, "pau", 1 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 1 * 0.01067, "i", 2 * 0.01067, 125.0),
+        _gen_mora("ホ", "h", 2 * 0.01067, "O", 1 * 0.01067, 0.0),
+    ]
+
+    # Outputs
+    moras = apply_speed_scale(input_moras, query)
+
+    assert moras == true_moras
+
+
+def test_apply_pitch_scale():
+    """Test `apply_pitch_scale`."""
+    # Inputs
+    query = _gen_query(pitchScale=2.0)
+    input_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 50.0),
+        _gen_mora("ン", None, None, "N", 0.0, 50.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 125.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Expects - x4 value scaled
+    true_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 200.0),
+        _gen_mora("ン", None, None, "N", 0.0, 200.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 500.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Outputs
+    moras = apply_pitch_scale(input_moras, query)
+
+    assert moras == true_moras
+
+
+def test_apply_intonation_scale():
+    """Test `apply_intonation_scale`."""
+    # Inputs
+    query = _gen_query(intonationScale=0.5)
+    input_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 200.0),
+        _gen_mora("ン", None, None, "N", 0.0, 200.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 500.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Expects - mean=300 var x0.5 intonation scaling
+    true_moras = [
+        _gen_mora("コ", "k", 0.0, "o", 0.0, 250.0),
+        _gen_mora("ン", None, None, "N", 0.0, 250.0),
+        _gen_mora("、", None, None, "pau", 0.0, 0.0),
+        _gen_mora("ヒ", "h", 0.0, "i", 0.0, 400.0),
+        _gen_mora("ホ", "h", 0.0, "O", 0.0, 0.0),
+    ]
+
+    # Outputs
+    moras = apply_intonation_scale(input_moras, query)
+
+    assert moras == true_moras
+
+
+def test_apply_volume_scale():
+    """Test `apply_volume_scale`."""
+    # Inputs
+    query = _gen_query(volumeScale=3.0)
+    input_wave = numpy.array([0.0, 1.0, 2.0])
+
+    # Expects - x3 scale
+    true_wave = numpy.array([0.0, 3.0, 6.0])
+
+    # Outputs
+    wave = apply_volume_scale(input_wave, query)
+
+    assert numpy.allclose(wave, true_wave)
+
+
+def test_apply_output_sampling_rate():
+    """Test `apply_output_sampling_rate`."""
+    # Inputs
+    query = _gen_query(outputSamplingRate=12000)
+    input_wave = numpy.array([1.0 for _ in range(120)])
+    input_sr_wave = 24000
+
+    # Expects - half sampling rate
+    true_wave = numpy.array([1.0 for _ in range(60)])
+    assert true_wave.shape == (60,), "Prerequisites"
+
+    # Outputs
+    wave = apply_output_sampling_rate(input_wave, input_sr_wave, query)
+
+    assert wave.shape[0] == true_wave.shape[0]
+
+
+def test_apply_output_stereo():
+    """Test `apply_output_stereo`."""
+    # Inputs
+    query = _gen_query(outputStereo=True)
+    input_wave = numpy.array([1.0, 0.0, 2.0])
+
+    # Expects - Stereo :: (Time, Channel)
+    true_wave = numpy.array([[1.0, 1.0], [0.0, 0.0], [2.0, 2.0]])
+
+    # Outputs
+    wave = apply_output_stereo(input_wave, query)
+
+    assert numpy.array_equal(wave, true_wave)
+
+
+def test_count_frame_per_unit():
+    """Test `count_frame_per_unit`."""
+    # Inputs
+    moras = [
+        _gen_mora("　", None, None, "　", 2 * 0.01067, 0.0),  # 0.01067 [sec/frame]
+        _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 0.0),
+        _gen_mora("ン", None, None, "N", 4 * 0.01067, 0.0),
+        _gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
+        _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 0.0),
+        _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
+        _gen_mora("　", None, None, "　", 6 * 0.01067, 0.0),
+    ]
+
+    # Expects
+    #                             Pre k  o  N pau h  i  h  O Pst
+    true_frame_per_phoneme_list = [2, 2, 4, 4, 2, 2, 4, 4, 2, 6]
+    true_frame_per_phoneme = numpy.array(true_frame_per_phoneme_list, dtype=numpy.int32)
+    #                         Pre ko  N pau hi hO Pst
+    true_frame_per_mora_list = [2, 6, 4, 2, 6, 6, 6]
+    true_frame_per_mora = numpy.array(true_frame_per_mora_list, dtype=numpy.int32)
+
+    # Outputs
+    frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
+
+    assert numpy.array_equal(frame_per_phoneme, true_frame_per_phoneme)
+    assert numpy.array_equal(frame_per_mora, true_frame_per_mora)
+
+
+def test_query_to_decoder_feature():
+    """Test `query_to_decoder_feature`."""
+    # Inputs
+    accent_phrases = [
+        AccentPhrase(
+            moras=[
+                _gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
+                _gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
+            ],
+            accent=1,
+            pause_mora=_gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
+        ),
+        AccentPhrase(
+            moras=[
+                _gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
+                _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
+            ],
+            accent=1,
+            pause_mora=None,
+        ),
+    ]
+    query = _gen_query(
+        accent_phrases=accent_phrases,
+        speedScale=2.0,
+        pitchScale=2.0,
+        intonationScale=0.5,
+        prePhonemeLength=2 * 0.01067,
+        postPhonemeLength=6 * 0.01067,
+    )
+
+    # Expects
+    # frame_per_phoneme
+    #                        Pre k  o  N pau h  i  h  O Pst
+    true_frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
+    n_frame = sum(true_frame_per_phoneme)
+    # phoneme
+    #                     Pr  k   o   o  N  N pau  h   i   i   h   h  O Pt Pt Pt
+    frame_phoneme_idxs = [0, 23, 30, 30, 4, 4, 0, 19, 21, 21, 19, 19, 5, 0, 0, 0]
+    true_phoneme = numpy.zeros([n_frame, TRUE_NUM_PHONEME], dtype=numpy.float32)
+    for frame_idx, phoneme_idx in enumerate(frame_phoneme_idxs):
+        true_phoneme[frame_idx, phoneme_idx] = 1.0
+    # Pitch
+    #                   paw ko  N pau hi hO paw
+    # frame_per_vowel = [1, 3,  2, 1, 3, 3, 3]
+    #           pau   ko     ko     ko      N      N
+    true1_f0 = [0.0, 250.0, 250.0, 250.0, 250.0, 250.0]
+    #           pau   hi     hi     hi
+    true2_f0 = [0.0, 400.0, 400.0, 400.0]
+    #           hO   hO   hO   paw  paw  paw
+    true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+    true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
+
+    # Outputs
+    phoneme, f0 = query_to_decoder_feature(query)
+
+    assert numpy.array_equal(phoneme, true_phoneme)
+    assert numpy.array_equal(f0, true_f0)
+
+
+def test_raw_wave_to_output_wave_with_resample():
+    """Test `raw_wave_to_output_wave` with resampling option."""
+    # Inputs
+    query = _gen_query(volumeScale=2, outputSamplingRate=48000, outputStereo=True)
+    raw_wave = numpy.random.rand(240)
+    sr_raw_wave = 24000
+
+    # Expects
+    true_wave_shape = (480, 2)
+
+    # Outputs
+    wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
+
+    assert wave.shape == true_wave_shape
+
+
+def test_raw_wave_to_output_wave_without_resample():
+    """Test `raw_wave_to_output_wave`  without resampling option."""
+    # Inputs
+    query = _gen_query(volumeScale=2, outputStereo=True)
+    raw_wave = numpy.random.rand(240)
+    sr_raw_wave = 24000
+
+    # Expects
+    true_wave = numpy.array([2 * raw_wave, 2 * raw_wave]).T
+
+    # Outputs
+    wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
+
+    assert numpy.allclose(wave, true_wave)

From 6417a7ffb0f454cc4a31433d9e1fdac05ff8d670 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 10 Jan 2024 01:20:35 +0900
Subject: [PATCH 134/177] =?UTF-8?q?numpy.ndarray=E3=81=AE=E3=81=BB?=
 =?UTF-8?q?=E3=81=BC=E5=85=A8=E3=81=A6=E3=81=AB=E5=9E=8B=E3=82=92=E4=BB=98?=
 =?UTF-8?q?=E3=81=91=E3=82=8B=20(#989)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ndarray消去

* いろいろ整理

* とりあえずテストは通るようになった
---
 test/e2e/test_audio_query.py               |   6 +-
 test/test_connect_base64_waves.py          |   5 +-
 test/tts_pipeline/test_tts_engine.py       | 169 +++++++++++----------
 test/tts_pipeline/test_wave_synthesizer.py |  42 ++---
 test/utility.py                            |  21 +++
 voicevox_engine/dev/core/mock.py           |  37 +++--
 voicevox_engine/model.py                   |   3 +
 voicevox_engine/morphing.py                |  23 +--
 voicevox_engine/tts_pipeline/tts_engine.py |  29 ++--
 9 files changed, 186 insertions(+), 149 deletions(-)
 create mode 100644 test/utility.py

diff --git a/test/e2e/test_audio_query.py b/test/e2e/test_audio_query.py
index 0be032894..bcab2fea1 100644
--- a/test/e2e/test_audio_query.py
+++ b/test/e2e/test_audio_query.py
@@ -2,6 +2,8 @@
 AudioQuery APIのテスト
 """
 
+from test.utility import round_floats
+
 from fastapi.testclient import TestClient
 from syrupy.extensions.json import JSONSnapshotExtension
 
@@ -11,7 +13,7 @@ def test_style_idを指定して音声合成クエリが取得できる(
 ) -> None:
     response = client.post("/audio_query", params={"text": "テストです", "style_id": 0})
     assert response.status_code == 200
-    assert snapshot_json == response.json()
+    assert snapshot_json == round_floats(response.json(), round_value=2)
 
 
 def test_speakerを指定しても音声合成クエリが取得できる(
@@ -19,7 +21,7 @@ def test_speakerを指定しても音声合成クエリが取得できる(
 ) -> None:
     response = client.post("/audio_query", params={"text": "テストです", "speaker": 0})
     assert response.status_code == 200
-    assert snapshot_json == response.json()
+    assert snapshot_json == round_floats(response.json(), round_value=2)
 
 
 def test_style_idとspeakerを両方指定するとエラー(client: TestClient) -> None:
diff --git a/test/test_connect_base64_waves.py b/test/test_connect_base64_waves.py
index ac9dfb841..88739d227 100644
--- a/test/test_connect_base64_waves.py
+++ b/test/test_connect_base64_waves.py
@@ -5,6 +5,7 @@
 import numpy as np
 import numpy.testing
 import soundfile
+from numpy.typing import NDArray
 from soxr import resample
 
 from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves
@@ -12,14 +13,14 @@
 
 def generate_sine_wave_ndarray(
     seconds: float, samplerate: int, frequency: float
-) -> np.ndarray:
+) -> NDArray[np.float32]:
     x = np.linspace(0, seconds, int(seconds * samplerate), endpoint=False)
     wave = np.sin(2 * np.pi * frequency * x).astype(np.float32)
 
     return wave
 
 
-def encode_bytes(wave_ndarray: np.ndarray, samplerate: int) -> bytes:
+def encode_bytes(wave_ndarray: NDArray[np.float32], samplerate: int) -> bytes:
     wave_bio = io.BytesIO()
     soundfile.write(
         file=wave_bio,
diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index 8a52ccc45..f13807f58 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -1,11 +1,10 @@
-import json
-from typing import Union
+from test.utility import pydantic_to_native_type, round_floats
 from unittest import TestCase
 from unittest.mock import Mock
 
-import numpy
+import numpy as np
 import pytest
-from pydantic.json import pydantic_encoder
+from numpy.typing import NDArray
 from syrupy.extensions.json import JSONSnapshotExtension
 
 from voicevox_engine.dev.core.mock import MockCoreWrapper
@@ -35,25 +34,25 @@ def is_same_phoneme(p1: Phoneme, p2: Phoneme) -> bool:
 
 
 def yukarin_s_mock(
-    length: int, phoneme_list: numpy.ndarray, style_id: numpy.ndarray
-) -> numpy.ndarray:
+    length: int, phoneme_list: NDArray[np.int64], style_id: NDArray[np.int64]
+) -> NDArray[np.float32]:
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
         result.append(round((phoneme_list[i] * 0.0625 + style_id).item(), 2))
-    return numpy.array(result)
+    return np.array(result, dtype=np.float32)
 
 
 def yukarin_sa_mock(
     length: int,
-    vowel_phoneme_list: numpy.ndarray,
-    consonant_phoneme_list: numpy.ndarray,
-    start_accent_list: numpy.ndarray,
-    end_accent_list: numpy.ndarray,
-    start_accent_phrase_list: numpy.ndarray,
-    end_accent_phrase_list: numpy.ndarray,
-    style_id: numpy.ndarray,
-) -> numpy.ndarray:
+    vowel_phoneme_list: NDArray[np.int64],
+    consonant_phoneme_list: NDArray[np.int64],
+    start_accent_list: NDArray[np.int64],
+    end_accent_list: NDArray[np.int64],
+    start_accent_phrase_list: NDArray[np.int64],
+    end_accent_phrase_list: NDArray[np.int64],
+    style_id: NDArray[np.int64],
+) -> NDArray[np.float32]:
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
@@ -74,23 +73,23 @@ def yukarin_sa_mock(
                 2,
             )
         )
-    return numpy.array(result)[numpy.newaxis]
+    return np.array(result, dtype=np.float32)[np.newaxis]
 
 
 def decode_mock(
     length: int,
     phoneme_size: int,
-    f0: numpy.ndarray,
-    phoneme: numpy.ndarray,
-    style_id: Union[numpy.ndarray, int],
-) -> numpy.ndarray:
+    f0: NDArray[np.float32],
+    phoneme: NDArray[np.float32],
+    style_id: NDArray[np.int64],
+) -> NDArray[np.float32]:
     result = []
     # mockとしての適当な処理、特に意味はない
     for i in range(length):
         result += [
-            (f0[i, 0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size) + style_id)
+            (f0[i, 0] * (np.where(phoneme[i] == 1)[0] / phoneme_size) + style_id)
         ] * 256
-    return numpy.array(result)
+    return np.array(result, dtype=np.float32)
 
 
 class MockCore:
@@ -247,7 +246,7 @@ def test_update_length(self):
         index = 1
 
         def result_value(i: int) -> float:
-            return round(float(phoneme_list[i] * 0.0625 + 1), 2)
+            return np.float32(round(float(phoneme_list[i] * 0.0625 + 1), 2)).item()
 
         for accent_phrase in true_result:
             moras = accent_phrase.moras
@@ -264,9 +263,9 @@ def result_value(i: int) -> float:
         self.assertEqual(list_length, true_list_length)
         self.assertEqual(list_length, len(phoneme_list))
         self.assertEqual(style_id, true_style_id)
-        numpy.testing.assert_array_equal(
+        np.testing.assert_array_equal(
             phoneme_list,
-            numpy.array(true_phoneme_list, dtype=numpy.int64),
+            np.array(true_phoneme_list, dtype=np.int64),
         )
         self.assertEqual(result, true_result)
 
@@ -295,12 +294,12 @@ def test_update_pitch(self):
         end_accent_phrase_list = yukarin_sa_args["end_accent_phrase_list"][0]
         style_id = yukarin_sa_args["style_id"]
         # Expects
-        true_vowels = numpy.array([0, 30, 4, 21, 21, 7, 0, 21, 30, 14, 6, 0])
-        true_consonants = numpy.array([-1, 23, -1, 28, 10, 42, -1, 19, 19, 12, 35, -1])
-        true_accent_starts = numpy.array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0])
-        true_accent_ends = numpy.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0])
-        true_phrase_starts = numpy.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
-        true_phrase_ends = numpy.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])
+        true_vowels = np.array([0, 30, 4, 21, 21, 7, 0, 21, 30, 14, 6, 0])
+        true_consonants = np.array([-1, 23, -1, 28, 10, 42, -1, 19, 19, 12, 35, -1])
+        true_accent_starts = np.array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0])
+        true_accent_ends = np.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0])
+        true_phrase_starts = np.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
+        true_phrase_ends = np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])
         true_result = _gen_hello_hiho_accent_phrases()
         index = 1
 
@@ -311,21 +310,23 @@ def result_value(i: int) -> float:
             ]
             if vowel_phoneme_list[i] in unvoiced_mora_tail_ids:
                 return 0
-            return round(
-                (
+            return np.float32(
+                round(
                     (
-                        vowel_phoneme_list[i]
-                        + consonant_phoneme_list[i]
-                        + start_accent_list[i]
-                        + end_accent_list[i]
-                        + start_accent_phrase_list[i]
-                        + end_accent_phrase_list[i]
-                    )
-                    * 0.0625
-                    + 1
-                ),
-                2,
-            )
+                        (
+                            vowel_phoneme_list[i]
+                            + consonant_phoneme_list[i]
+                            + start_accent_list[i]
+                            + end_accent_list[i]
+                            + start_accent_phrase_list[i]
+                            + end_accent_phrase_list[i]
+                        )
+                        * 0.0625
+                        + 1
+                    ),
+                    2,
+                )
+            ).item()
 
         for accent_phrase in true_result:
             moras = accent_phrase.moras
@@ -344,12 +345,12 @@ def result_value(i: int) -> float:
         self.assertEqual(list_length, len(start_accent_phrase_list))
         self.assertEqual(list_length, len(end_accent_phrase_list))
         self.assertEqual(style_id, 1)
-        numpy.testing.assert_array_equal(vowel_phoneme_list, true_vowels)
-        numpy.testing.assert_array_equal(consonant_phoneme_list, true_consonants)
-        numpy.testing.assert_array_equal(start_accent_list, true_accent_starts)
-        numpy.testing.assert_array_equal(end_accent_list, true_accent_ends)
-        numpy.testing.assert_array_equal(start_accent_phrase_list, true_phrase_starts)
-        numpy.testing.assert_array_equal(end_accent_phrase_list, true_phrase_ends)
+        np.testing.assert_array_equal(vowel_phoneme_list, true_vowels)
+        np.testing.assert_array_equal(consonant_phoneme_list, true_consonants)
+        np.testing.assert_array_equal(start_accent_list, true_accent_starts)
+        np.testing.assert_array_equal(end_accent_list, true_accent_ends)
+        np.testing.assert_array_equal(start_accent_phrase_list, true_phrase_starts)
+        np.testing.assert_array_equal(end_accent_phrase_list, true_phrase_ends)
         self.assertEqual(result, true_result)
 
 
@@ -373,7 +374,7 @@ def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> No
     # Outputs
     result = tts_engine.update_length(hello_hiho, StyleId(1))
     # Tests
-    assert snapshot_json == json.loads(json.dumps(result, default=pydantic_encoder))
+    assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
 
 
 def koreha_arimasuka_base_expected():
@@ -383,26 +384,26 @@ def koreha_arimasuka_base_expected():
                 Mora(
                     text="コ",
                     consonant="k",
-                    consonant_length=2.44,
+                    consonant_length=np.float32(2.44),
                     vowel="o",
-                    vowel_length=2.88,
-                    pitch=4.38,
+                    vowel_length=np.float32(2.88),
+                    pitch=np.float32(4.38),
                 ),
                 Mora(
                     text="レ",
                     consonant="r",
-                    consonant_length=3.06,
+                    consonant_length=np.float32(3.06),
                     vowel="e",
-                    vowel_length=1.88,
-                    pitch=4.0,
+                    vowel_length=np.float32(1.88),
+                    pitch=np.float32(4.0),
                 ),
                 Mora(
                     text="ワ",
                     consonant="w",
-                    consonant_length=3.62,
+                    consonant_length=np.float32(3.62),
                     vowel="a",
-                    vowel_length=1.44,
-                    pitch=4.19,
+                    vowel_length=np.float32(1.44),
+                    pitch=np.float32(4.19),
                 ),
             ],
             accent=3,
@@ -416,40 +417,40 @@ def koreha_arimasuka_base_expected():
                     consonant=None,
                     consonant_length=None,
                     vowel="a",
-                    vowel_length=1.44,
-                    pitch=1.44,
+                    vowel_length=np.float32(1.44),
+                    pitch=np.float32(1.44),
                 ),
                 Mora(
                     text="リ",
                     consonant="r",
-                    consonant_length=3.06,
+                    consonant_length=np.float32(3.06),
                     vowel="i",
-                    vowel_length=2.31,
-                    pitch=4.44,
+                    vowel_length=np.float32(2.31),
+                    pitch=np.float32(4.44),
                 ),
                 Mora(
                     text="マ",
                     consonant="m",
-                    consonant_length=2.62,
+                    consonant_length=np.float32(2.62),
                     vowel="a",
-                    vowel_length=1.44,
-                    pitch=3.12,
+                    vowel_length=np.float32(1.44),
+                    pitch=np.float32(3.12),
                 ),
                 Mora(
                     text="ス",
                     consonant="s",
-                    consonant_length=3.19,
+                    consonant_length=np.float32(3.19),
                     vowel="U",
-                    vowel_length=1.38,
-                    pitch=0.0,
+                    vowel_length=np.float32(1.38),
+                    pitch=np.float32(0.0),
                 ),
                 Mora(
                     text="カ",
                     consonant="k",
-                    consonant_length=2.44,
+                    consonant_length=np.float32(2.44),
                     vowel="a",
-                    vowel_length=1.44,
-                    pitch=2.94,
+                    vowel_length=np.float32(1.44),
+                    pitch=np.float32(2.94),
                 ),
             ],
             accent=3,
@@ -498,7 +499,7 @@ def test_upspeak_voiced_last_mora(self):
                 consonant_length=None,
                 vowel="a",
                 vowel_length=0.15,
-                pitch=expected[-1].moras[-1].pitch + 0.3,
+                pitch=np.float32(expected[-1].moras[-1].pitch) + 0.3,
             )
         ]
         self.create_synthesis_test_base(
@@ -534,8 +535,8 @@ def nn_base_expected():
                             consonant=None,
                             consonant_length=None,
                             vowel="N",
-                            vowel_length=1.25,
-                            pitch=1.44,
+                            vowel_length=np.float32(1.25),
+                            pitch=np.float32(1.44),
                         )
                     ],
                     accent=1,
@@ -562,7 +563,7 @@ def nn_base_expected():
                 consonant_length=None,
                 vowel="N",
                 vowel_length=0.15,
-                pitch=expected[-1].moras[-1].pitch + 0.3,
+                pitch=np.float32(expected[-1].moras[-1].pitch) + 0.3,
             )
         ]
         self.create_synthesis_test_base(
@@ -590,8 +591,8 @@ def ltu_base_expected():
                             consonant=None,
                             consonant_length=None,
                             vowel="cl",
-                            vowel_length=1.69,
-                            pitch=0.0,
+                            vowel_length=np.float32(1.69),
+                            pitch=np.float32(0.0),
                         )
                     ],
                     accent=1,
@@ -634,10 +635,10 @@ def su_base_expected():
                         Mora(
                             text="ス",
                             consonant="s",
-                            consonant_length=3.19,
+                            consonant_length=np.float32(3.19),
                             vowel="u",
-                            vowel_length=3.5,
-                            pitch=5.94,
+                            vowel_length=np.float32(3.5),
+                            pitch=np.float32(5.94),
                         )
                     ],
                     accent=1,
diff --git a/test/tts_pipeline/test_wave_synthesizer.py b/test/tts_pipeline/test_wave_synthesizer.py
index 2ece25088..084edcec6 100644
--- a/test/tts_pipeline/test_wave_synthesizer.py
+++ b/test/tts_pipeline/test_wave_synthesizer.py
@@ -1,6 +1,6 @@
 """波形合成のテスト"""
 
-import numpy
+import numpy as np
 
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.tts_engine import (
@@ -170,26 +170,26 @@ def test_apply_volume_scale():
     """Test `apply_volume_scale`."""
     # Inputs
     query = _gen_query(volumeScale=3.0)
-    input_wave = numpy.array([0.0, 1.0, 2.0])
+    input_wave = np.array([0.0, 1.0, 2.0])
 
     # Expects - x3 scale
-    true_wave = numpy.array([0.0, 3.0, 6.0])
+    true_wave = np.array([0.0, 3.0, 6.0])
 
     # Outputs
     wave = apply_volume_scale(input_wave, query)
 
-    assert numpy.allclose(wave, true_wave)
+    assert np.allclose(wave, true_wave)
 
 
 def test_apply_output_sampling_rate():
     """Test `apply_output_sampling_rate`."""
     # Inputs
     query = _gen_query(outputSamplingRate=12000)
-    input_wave = numpy.array([1.0 for _ in range(120)])
+    input_wave = np.array([1.0 for _ in range(120)])
     input_sr_wave = 24000
 
     # Expects - half sampling rate
-    true_wave = numpy.array([1.0 for _ in range(60)])
+    true_wave = np.array([1.0 for _ in range(60)])
     assert true_wave.shape == (60,), "Prerequisites"
 
     # Outputs
@@ -202,15 +202,15 @@ def test_apply_output_stereo():
     """Test `apply_output_stereo`."""
     # Inputs
     query = _gen_query(outputStereo=True)
-    input_wave = numpy.array([1.0, 0.0, 2.0])
+    input_wave = np.array([1.0, 0.0, 2.0])
 
     # Expects - Stereo :: (Time, Channel)
-    true_wave = numpy.array([[1.0, 1.0], [0.0, 0.0], [2.0, 2.0]])
+    true_wave = np.array([[1.0, 1.0], [0.0, 0.0], [2.0, 2.0]])
 
     # Outputs
     wave = apply_output_stereo(input_wave, query)
 
-    assert numpy.array_equal(wave, true_wave)
+    assert np.array_equal(wave, true_wave)
 
 
 def test_count_frame_per_unit():
@@ -229,16 +229,16 @@ def test_count_frame_per_unit():
     # Expects
     #                             Pre k  o  N pau h  i  h  O Pst
     true_frame_per_phoneme_list = [2, 2, 4, 4, 2, 2, 4, 4, 2, 6]
-    true_frame_per_phoneme = numpy.array(true_frame_per_phoneme_list, dtype=numpy.int32)
+    true_frame_per_phoneme = np.array(true_frame_per_phoneme_list, dtype=np.int32)
     #                         Pre ko  N pau hi hO Pst
     true_frame_per_mora_list = [2, 6, 4, 2, 6, 6, 6]
-    true_frame_per_mora = numpy.array(true_frame_per_mora_list, dtype=numpy.int32)
+    true_frame_per_mora = np.array(true_frame_per_mora_list, dtype=np.int32)
 
     # Outputs
     frame_per_phoneme, frame_per_mora = count_frame_per_unit(moras)
 
-    assert numpy.array_equal(frame_per_phoneme, true_frame_per_phoneme)
-    assert numpy.array_equal(frame_per_mora, true_frame_per_mora)
+    assert np.array_equal(frame_per_phoneme, true_frame_per_phoneme)
+    assert np.array_equal(frame_per_mora, true_frame_per_mora)
 
 
 def test_query_to_decoder_feature():
@@ -279,7 +279,7 @@ def test_query_to_decoder_feature():
     # phoneme
     #                     Pr  k   o   o  N  N pau  h   i   i   h   h  O Pt Pt Pt
     frame_phoneme_idxs = [0, 23, 30, 30, 4, 4, 0, 19, 21, 21, 19, 19, 5, 0, 0, 0]
-    true_phoneme = numpy.zeros([n_frame, TRUE_NUM_PHONEME], dtype=numpy.float32)
+    true_phoneme = np.zeros([n_frame, TRUE_NUM_PHONEME], dtype=np.float32)
     for frame_idx, phoneme_idx in enumerate(frame_phoneme_idxs):
         true_phoneme[frame_idx, phoneme_idx] = 1.0
     # Pitch
@@ -291,20 +291,20 @@ def test_query_to_decoder_feature():
     true2_f0 = [0.0, 400.0, 400.0, 400.0]
     #           hO   hO   hO   paw  paw  paw
     true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-    true_f0 = numpy.array(true1_f0 + true2_f0 + true3_f0, dtype=numpy.float32)
+    true_f0 = np.array(true1_f0 + true2_f0 + true3_f0, dtype=np.float32)
 
     # Outputs
     phoneme, f0 = query_to_decoder_feature(query)
 
-    assert numpy.array_equal(phoneme, true_phoneme)
-    assert numpy.array_equal(f0, true_f0)
+    assert np.array_equal(phoneme, true_phoneme)
+    assert np.array_equal(f0, true_f0)
 
 
 def test_raw_wave_to_output_wave_with_resample():
     """Test `raw_wave_to_output_wave` with resampling option."""
     # Inputs
     query = _gen_query(volumeScale=2, outputSamplingRate=48000, outputStereo=True)
-    raw_wave = numpy.random.rand(240)
+    raw_wave = np.random.rand(240).astype(np.float32)
     sr_raw_wave = 24000
 
     # Expects
@@ -320,13 +320,13 @@ def test_raw_wave_to_output_wave_without_resample():
     """Test `raw_wave_to_output_wave`  without resampling option."""
     # Inputs
     query = _gen_query(volumeScale=2, outputStereo=True)
-    raw_wave = numpy.random.rand(240)
+    raw_wave = np.random.rand(240).astype(np.float32)
     sr_raw_wave = 24000
 
     # Expects
-    true_wave = numpy.array([2 * raw_wave, 2 * raw_wave]).T
+    true_wave = np.array([2 * raw_wave, 2 * raw_wave]).T
 
     # Outputs
     wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
 
-    assert numpy.allclose(wave, true_wave)
+    assert np.allclose(wave, true_wave)
diff --git a/test/utility.py b/test/utility.py
new file mode 100644
index 000000000..8a8eafbb0
--- /dev/null
+++ b/test/utility.py
@@ -0,0 +1,21 @@
+import json
+from typing import Any
+
+from pydantic.json import pydantic_encoder
+
+
+def round_floats(value: Any, round_value: int) -> Any:
+    """floatの小数点以下を再帰的に丸める"""
+    if isinstance(value, float):
+        return round(value, round_value)
+    elif isinstance(value, list):
+        return [round_floats(v, round_value) for v in value]
+    elif isinstance(value, dict):
+        return {k: round_floats(v, round_value) for k, v in value.items()}
+    else:
+        return value
+
+
+def pydantic_to_native_type(value: Any) -> Any:
+    """pydanticの型をnativeな型に変換する"""
+    return json.loads(json.dumps(value, default=pydantic_encoder))
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 51b06db28..aa468234b 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -2,7 +2,6 @@
 from pathlib import Path
 
 import numpy as np
-from numpy import ndarray
 from numpy.typing import NDArray
 
 from ...core_wrapper import CoreWrapper
@@ -65,26 +64,26 @@ def metas(self) -> str:
         )
 
     def yukarin_s_forward(
-        self, length: int, phoneme_list: ndarray, style_id: ndarray
-    ) -> NDArray[np.floating]:
+        self, length: int, phoneme_list: NDArray[np.int64], style_id: NDArray[np.int64]
+    ) -> NDArray[np.float32]:
         """音素系列サイズ・音素ID系列・スタイルIDから音素長系列を生成する"""
         result = []
         # mockとしての適当な処理、特に意味はない
         for i in range(length):
             result.append(round((phoneme_list[i] * 0.0625 + style_id).item(), 2))
-        return np.array(result)
+        return np.array(result, dtype=np.float32)
 
     def yukarin_sa_forward(
         self,
         length: int,
-        vowel_phoneme_list: ndarray,
-        consonant_phoneme_list: ndarray,
-        start_accent_list: ndarray,
-        end_accent_list: ndarray,
-        start_accent_phrase_list: ndarray,
-        end_accent_phrase_list: ndarray,
-        style_id: ndarray,
-    ) -> NDArray[np.floating]:
+        vowel_phoneme_list: NDArray[np.int64],
+        consonant_phoneme_list: NDArray[np.int64],
+        start_accent_list: NDArray[np.int64],
+        end_accent_list: NDArray[np.int64],
+        start_accent_phrase_list: NDArray[np.int64],
+        end_accent_phrase_list: NDArray[np.int64],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
         """モーラ系列サイズ・母音系列・子音系列・アクセント位置・アクセント句区切り・スタイルIDからモーラ音高系列を生成する"""
         assert length > 1, "前後無音を必ず付与しなければならない"
 
@@ -108,24 +107,24 @@ def yukarin_sa_forward(
                     2,
                 )
             )
-        return np.array(result)[np.newaxis]
+        return np.array(result, dtype=np.float32)[np.newaxis]
 
     def decode_forward(
         self,
         length: int,
         phoneme_size: int,
-        f0: ndarray,
-        phoneme: ndarray,
-        style_id: ndarray,
-    ) -> NDArray[np.floating]:
+        f0: NDArray[np.float32],
+        phoneme: NDArray[np.float32],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
         """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDからダミー音声波形を生成する"""
         # 入力値を反映し、長さが 256 倍であるダミー配列を出力する
-        result: list[ndarray] = []
+        result: list[NDArray[np.float32]] = []
         for i in range(length):
             result += [
                 (f0[i, 0] * (np.where(phoneme[i] == 1)[0] / phoneme_size) + style_id)
             ] * 256
-        return np.array(result)
+        return np.array(result, dtype=np.float32)
 
     def supported_devices(self):
         return json.dumps(
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index d8951b0be..cd92cab4f 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -26,6 +26,9 @@ def __hash__(self):
         ]
         return hash(tuple(sorted(items)))
 
+    class Config:
+        validate_assignment = True
+
 
 class AccentPhrase(BaseModel):
     """
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 92b54c2d1..957d2d279 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -1,3 +1,8 @@
+"""
+WORLDを使ってモーフィングするためのモジュール。
+pyworldの入出力はnp.doubleやnp.float64なので注意。
+"""
+
 from copy import deepcopy
 from dataclasses import dataclass
 from itertools import chain
@@ -23,10 +28,10 @@
 class MorphingParameter:
     fs: int
     frame_period: float
-    base_f0: NDArray[np.double]
-    base_aperiodicity: NDArray[np.double]
-    base_spectrogram: NDArray[np.double]
-    target_spectrogram: NDArray[np.double]
+    base_f0: NDArray[np.float64]
+    base_aperiodicity: NDArray[np.float64]
+    base_spectrogram: NDArray[np.float64]
+    target_spectrogram: NDArray[np.float64]
 
 
 def create_morphing_parameter(
@@ -147,8 +152,8 @@ def synthesis_morphing_parameter(
     # WORLDに掛けるため合成はモノラルで行う
     query.outputStereo = False
 
-    base_wave = engine.synthesize_wave(query, base_style_id).astype("float")
-    target_wave = engine.synthesize_wave(query, target_style_id).astype("float")
+    base_wave = engine.synthesize_wave(query, base_style_id).astype(np.double)
+    target_wave = engine.synthesize_wave(query, target_style_id).astype(np.double)
 
     return create_morphing_parameter(
         base_wave=base_wave,
@@ -162,7 +167,7 @@ def synthesis_morphing(
     morph_rate: float,
     output_fs: int,
     output_stereo: bool = False,
-) -> NDArray[np.float64]:
+) -> NDArray[np.float32]:
     """
     指定した割合で、パラメータをもとにモーフィングした音声を生成します。
 
@@ -177,7 +182,7 @@ def synthesis_morphing(
 
     Returns
     -------
-    generated : NDArray[np.float64]
+    generated : NDArray[np.float32]
         モーフィングした音声
 
     Raises
@@ -200,7 +205,7 @@ def synthesis_morphing(
         morph_param.base_aperiodicity,
         morph_param.fs,
         morph_param.frame_period,
-    )
+    ).astype(np.float32)
 
     # TODO: tts_engine.py でのリサンプル処理と共通化する
     if output_fs != morph_param.fs:
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 0fbdc054c..c71fbaab9 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -128,7 +128,7 @@ def apply_speed_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
 
 def count_frame_per_unit(
     moras: list[Mora],
-) -> tuple[NDArray[np.integer], NDArray[np.integer]]:
+) -> tuple[NDArray[np.int64], NDArray[np.int64]]:
     """
     音素あたり・モーラあたりのフレーム長を算出する
     Parameters
@@ -137,9 +137,9 @@ def count_frame_per_unit(
         モーラ系列
     Returns
     -------
-    frame_per_phoneme : NDArray[np.integer]
+    frame_per_phoneme : NDArray[np.int64]
         音素あたりのフレーム長。端数丸め。shape = (Phoneme,)
-    frame_per_mora : NDArray[np.integer]
+    frame_per_mora : NDArray[np.int64]
         モーラあたりのフレーム長。端数丸め。shape = (Mora,)
     """
     frame_per_phoneme: list[int] = []
@@ -156,7 +156,10 @@ def count_frame_per_unit(
         frame_per_phoneme += [vowel_frames]
         frame_per_mora += [mora_frames]
 
-    return np.array(frame_per_phoneme), np.array(frame_per_mora)
+    return (
+        np.array(frame_per_phoneme, dtype=np.int64),
+        np.array(frame_per_mora, dtype=np.int64),
+    )
 
 
 def _to_frame(sec: float) -> int:
@@ -183,14 +186,16 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
     return moras
 
 
-def apply_volume_scale(wave: np.ndarray, query: AudioQuery) -> NDArray[np.floating]:
+def apply_volume_scale(
+    wave: NDArray[np.float32], query: AudioQuery
+) -> NDArray[np.float32]:
     """音声波形へ音声合成用のクエリがもつ音量スケール（`volumeScale`）を適用する"""
     return wave * query.volumeScale
 
 
 def apply_output_sampling_rate(
-    wave: NDArray[np.floating], sr_wave: float, query: AudioQuery
-) -> NDArray[np.floating]:
+    wave: NDArray[np.float32], sr_wave: float, query: AudioQuery
+) -> NDArray[np.float32]:
     """音声波形へ音声合成用のクエリがもつ出力サンプリングレート（`outputSamplingRate`）を適用する"""
     # サンプリングレート一致のときはスルー
     if sr_wave == query.outputSamplingRate:
@@ -200,8 +205,8 @@ def apply_output_sampling_rate(
 
 
 def apply_output_stereo(
-    wave: NDArray[np.floating], query: AudioQuery
-) -> NDArray[np.floating]:
+    wave: NDArray[np.float32], query: AudioQuery
+) -> NDArray[np.float32]:
     """音声波形へ音声合成用のクエリがもつステレオ出力設定（`outputStereo`）を適用する"""
     if query.outputStereo:
         wave = np.array([wave, wave]).T
@@ -233,8 +238,8 @@ def query_to_decoder_feature(
 
 
 def raw_wave_to_output_wave(
-    query: AudioQuery, wave: np.ndarray, sr_wave: int
-) -> NDArray[np.floating]:
+    query: AudioQuery, wave: NDArray[np.float32], sr_wave: int
+) -> NDArray[np.float32]:
     """生音声波形に音声合成用のクエリを適用して出力音声波形を生成する"""
     wave = apply_volume_scale(wave, query)
     wave = apply_output_sampling_rate(wave, sr_wave, query)
@@ -379,7 +384,7 @@ def synthesize_wave(
         query: AudioQuery,
         style_id: StyleId,
         enable_interrogative_upspeak: bool = True,
-    ) -> NDArray[np.floating]:
+    ) -> NDArray[np.float32]:
         """音声合成用のクエリ・スタイルID・疑問文語尾自動調整フラグに基づいて音声波形を生成する"""
         # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない
         query = copy.deepcopy(query)

From b54ad9513f9a1b940c19719c3fc2629a4325f83a Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 10 Jan 2024 01:33:10 +0900
Subject: [PATCH 135/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E7=84=A1?=
 =?UTF-8?q?=E9=9F=B3=E4=BB=98=E5=8A=A0=E3=82=92=20`CoreAdapter`=20?=
 =?UTF-8?q?=E3=81=B8=E7=A7=BB=E6=A4=8D=20(#999)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: 無音付加を `CoreAdapter` へ移植
---
 voicevox_engine/core_adapter.py            | 15 ++++++++++++++-
 voicevox_engine/tts_pipeline/tts_engine.py | 13 +++----------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
index 9197e3876..56d9fab2c 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core_adapter.py
@@ -89,8 +89,17 @@ def safe_yukarin_sa_forward(
         end_accent_phrase_list: NDArray[np.int64],
         style_id: StyleId,
     ) -> NDArray[np.float32]:
-        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        # 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音自動付加」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+
+        # 前後無音を付加する（詳細: voicevox_engine#924）
+        vowel_phoneme_list = np.r_[0, vowel_phoneme_list, 0]
+        consonant_phoneme_list = np.r_[-1, consonant_phoneme_list, -1]
+        start_accent_list = np.r_[0, start_accent_list, 0]
+        end_accent_list = np.r_[0, end_accent_list, 0]
+        start_accent_phrase_list = np.r_[0, start_accent_phrase_list, 0]
+        end_accent_phrase_list = np.r_[0, end_accent_phrase_list, 0]
+
         with self.mutex:
             f0_list = self.core.yukarin_sa_forward(
                 length=vowel_phoneme_list.shape[0],
@@ -102,6 +111,10 @@ def safe_yukarin_sa_forward(
                 end_accent_phrase_list=end_accent_phrase_list[np.newaxis],
                 style_id=np.array(style_id, dtype=np.int64).reshape(-1),
             )[0]
+
+        # 前後無音に相当する領域を破棄する
+        f0_list = f0_list[1:-1]
+
         return f0_list
 
     def safe_decode_forward(
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index c71fbaab9..ae2b0bdcd 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -317,18 +317,11 @@ def update_pitch(
             [_create_one_hot(accent_phrase, -1) for accent_phrase in accent_phrases]
         )
 
-        # 前後無音を付加する
-        start_accent_list = np.r_[0, start_accent_list, 0]
-        end_accent_list = np.r_[0, end_accent_list, 0]
-        start_accent_phrase_list = np.r_[0, start_accent_phrase_list, 0]
-        end_accent_phrase_list = np.r_[0, end_accent_phrase_list, 0]
-
-        # アクセント句系列から（前後の無音含まない）モーラ系列と（前後の無音含む）音素系列を抽出する
+        # アクセント句系列からモーラ系列と音素系列を抽出する
         moras = to_flatten_moras(accent_phrases)
         phonemes = to_flatten_phonemes(moras)
-        phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]
 
-        # 前後無音付加済みの音素系列から子音ID系列・母音ID系列を抽出する
+        # 音素系列から子音ID系列・母音ID系列を抽出する
         consonants, vowels = split_mora(phonemes)
         vowel_ids = np.array([p.phoneme_id for p in vowels], dtype=np.int64)
         consonant_ids = np.array(
@@ -353,7 +346,7 @@ def update_pitch(
 
         # 更新する
         for i, mora in enumerate(moras):
-            mora.pitch = f0[i + 1]
+            mora.pitch = f0[i]
 
         return accent_phrases
 

From 46ea0256f6823969463ebf458d5c24a8fc4c7fb3 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 10 Jan 2024 02:09:36 +0900
Subject: [PATCH 136/177] =?UTF-8?q?=E5=BB=83=E6=AD=A2:=20`split=5Fmora()`?=
 =?UTF-8?q?=20(#1000)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove: `split_mora()`
---
 test/tts_pipeline/test_tts_engine.py       | 45 ----------------------
 voicevox_engine/tts_pipeline/tts_engine.py | 28 ++++----------
 2 files changed, 7 insertions(+), 66 deletions(-)

diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index f13807f58..e7417792a 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -18,20 +18,12 @@
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
     apply_interrogative_upspeak,
-    split_mora,
     to_flatten_moras,
     to_flatten_phonemes,
 )
 
 from .test_text_analyzer import stub_unknown_features_koxx
 
-TRUE_NUM_PHONEME = 45
-
-
-def is_same_phoneme(p1: Phoneme, p2: Phoneme) -> bool:
-    """2つのPhonemeが同じ `.phoneme` を持つ"""
-    return p1.phoneme == p2.phoneme
-
 
 def yukarin_s_mock(
     length: int, phoneme_list: NDArray[np.int64], style_id: NDArray[np.int64]
@@ -171,43 +163,6 @@ def _gen_hello_hiho_accent_phrases() -> list[AccentPhrase]:
     ]
 
 
-def is_same_phonemes(
-    p1s: list[Phoneme] | list[Phoneme | None], p2s: list[Phoneme] | list[Phoneme | None]
-) -> bool:
-    """2つのPhonemeリストで全要素ペアが同じ `.phoneme` を持つ"""
-    if len(p1s) != len(p2s):
-        return False
-
-    for p1, p2 in zip(p1s, p2s):
-        if p1 is None and p2 is None:  # None vs None -> equal
-            pass
-        elif p1 is None:  # None vs OjtOhoneme -> not equal
-            return False
-        elif p2 is None:  # OjtOhoneme vs None -> not equal
-            return False
-        elif is_same_phoneme(p1, p2):
-            pass
-        else:
-            return False
-    return True
-
-
-def test_split_mora():
-    # Inputs
-    hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
-    hello_hiho_phonemes = [Phoneme(p) for p in hello_hiho.split()]
-    # Outputs
-    consonants, vowels = split_mora(hello_hiho_phonemes)
-    # Expects
-    cs = [None, "k", None, "n", "ch", "w", None, "h", "h", "d", "s", None]
-    vs = ["pau", "o", "N", "i", "i", "a", "pau", "i", "o", "e", "U", "pau"]
-    true_consonants = [Phoneme(p) if p else None for p in cs]
-    true_vowels = [Phoneme(p) for p in vs]
-    # Tests
-    assert is_same_phonemes(vowels, true_vowels)
-    assert is_same_phonemes(consonants, true_consonants)
-
-
 class TestTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index ae2b0bdcd..a9d3ea7a4 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -51,21 +51,6 @@ def to_flatten_phonemes(moras: list[Mora]) -> list[Phoneme]:
     return phonemes
 
 
-def split_mora(phonemes: list[Phoneme]) -> tuple[list[Phoneme | None], list[Phoneme]]:
-    """音素系列から子音系列と母音系列を抽出する"""
-    consonants: list[Phoneme | None] = []
-    vowels: list[Phoneme] = []
-    for i, p in enumerate(phonemes):
-        if p.is_mora_tail():
-            vowels += [p]
-            # Vowel のみのモーラの場合（Vowel が連続する場合）、Consonant を None とする
-            if i == 0 or phonemes[i - 1].is_mora_tail():
-                consonants += [None]
-        else:
-            consonants += [p]
-    return consonants, vowels
-
-
 def _create_one_hot(accent_phrase: AccentPhrase, index: int) -> NDArray[np.int64]:
     """
     アクセント句から指定インデックスのみが 1 の配列 (onehot) を生成する。
@@ -319,14 +304,15 @@ def update_pitch(
 
         # アクセント句系列からモーラ系列と音素系列を抽出する
         moras = to_flatten_moras(accent_phrases)
-        phonemes = to_flatten_phonemes(moras)
 
-        # 音素系列から子音ID系列・母音ID系列を抽出する
-        consonants, vowels = split_mora(phonemes)
+        # モーラ系列から子音ID系列・母音ID系列を抽出する
+        consonant_id_ints = [
+            Phoneme(mora.consonant).phoneme_id if mora.consonant else -1
+            for mora in moras
+        ]
+        consonant_ids = np.array(consonant_id_ints, dtype=np.int64)
+        vowels = [Phoneme(mora.vowel) for mora in moras]
         vowel_ids = np.array([p.phoneme_id for p in vowels], dtype=np.int64)
-        consonant_ids = np.array(
-            [p.phoneme_id if p else -1 for p in consonants], dtype=np.int64
-        )
 
         # コアを用いてモーラ音高を生成する
         f0 = self._core.safe_yukarin_sa_forward(

From 9bba5976812e0f00e9637217bfac10961b06f5d8 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 10 Jan 2024 06:09:56 +0900
Subject: [PATCH 137/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20length=20?=
 =?UTF-8?q?=E7=84=A1=E9=9F=B3=E4=BB=98=E5=8A=A0=E3=81=AE`CoreAdapter`=20?=
 =?UTF-8?q?=E7=A7=BB=E6=A4=8D=20(#1001)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: length 無音付加の`CoreAdapter` 移植

* 無音自動付加→無音付加

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 voicevox_engine/core_adapter.py            | 12 ++++++++++--
 voicevox_engine/tts_pipeline/tts_engine.py |  7 +++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
index 56d9fab2c..2fe77349a 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core_adapter.py
@@ -69,14 +69,22 @@ def is_initialized_style_id_synthesis(self, style_id: StyleId) -> bool:
     def safe_yukarin_s_forward(
         self, phoneme_list_s: NDArray[np.int64], style_id: StyleId
     ) -> NDArray[np.float32]:
-        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        # 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音付加」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+
+        # 前後無音を付加する（詳細: voicevox_engine#924）
+        phoneme_list_s = np.r_[0, phoneme_list_s, 0]
+
         with self.mutex:
             phoneme_length = self.core.yukarin_s_forward(
                 length=len(phoneme_list_s),
                 phoneme_list=phoneme_list_s,
                 style_id=np.array(style_id, dtype=np.int64).reshape(-1),
             )
+
+        # 前後無音に相当する領域を破棄する
+        phoneme_length = phoneme_length[1:-1]
+
         return phoneme_length
 
     def safe_yukarin_sa_forward(
@@ -89,7 +97,7 @@ def safe_yukarin_sa_forward(
         end_accent_phrase_list: NDArray[np.int64],
         style_id: StyleId,
     ) -> NDArray[np.float32]:
-        # 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音自動付加」「系列長・データ型に関するアダプター」を提供する
+        # 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音付加」「系列長・データ型に関するアダプター」を提供する
         self.initialize_style_id_synthesis(style_id, skip_reinit=True)
 
         # 前後無音を付加する（詳細: voicevox_engine#924）
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index a9d3ea7a4..b113f8444 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -247,9 +247,8 @@ def update_length(
         # モーラ系列を抽出する
         moras = to_flatten_moras(accent_phrases)
 
-        # 音素系列を抽出し前後無音を付加する
+        # 音素系列を抽出する
         phonemes = to_flatten_phonemes(moras)
-        phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]
 
         # 音素クラスから音素IDスカラへ表現を変換する
         phoneme_ids = np.array([p.phoneme_id for p in phonemes], dtype=np.int64)
@@ -263,8 +262,8 @@ def update_length(
             if mora.consonant is None:
                 mora.consonant_length = None
             else:
-                mora.consonant_length = phoneme_lengths[vowel_indexes[i + 1] - 1]
-            mora.vowel_length = phoneme_lengths[vowel_indexes[i + 1]]
+                mora.consonant_length = phoneme_lengths[vowel_indexes[i] - 1]
+            mora.vowel_length = phoneme_lengths[vowel_indexes[i]]
 
         return accent_phrases
 

From 320661b2c2d1ced0bd7266c8e0f3e12d911c7588 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 10 Jan 2024 06:16:47 +0900
Subject: [PATCH 138/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`.phoneme=5Fid`?=
 =?UTF-8?q?=20=E5=B1=9E=E6=80=A7=E3=83=AA=E3=83=8D=E3=83=BC=E3=83=A0=20(#1?=
 =?UTF-8?q?003)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `.phoneme_id` 属性リネーム
---
 test/tts_pipeline/test_acoustic_feature_extractor.py       | 4 ++--
 test/tts_pipeline/test_tts_engine.py                       | 2 +-
 voicevox_engine/tts_pipeline/acoustic_feature_extractor.py | 4 ++--
 voicevox_engine/tts_pipeline/tts_engine.py                 | 7 +++----
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/test/tts_pipeline/test_acoustic_feature_extractor.py b/test/tts_pipeline/test_acoustic_feature_extractor.py
index 6539541bf..4bf76267a 100644
--- a/test/tts_pipeline/test_acoustic_feature_extractor.py
+++ b/test/tts_pipeline/test_acoustic_feature_extractor.py
@@ -14,7 +14,7 @@ def test_unknown_phoneme():
 
     # Tests
     with pytest.raises(ValueError) as _:
-        _ = unknown_phoneme.phoneme_id
+        _ = unknown_phoneme.id
 
 
 class TestPhoneme(TestCase):
@@ -37,7 +37,7 @@ def test_convert(self):
         self.assertEqual(sil_phoneme.phoneme, "pau")
 
     def test_phoneme_id(self):
-        ojt_str_hello_hiho = " ".join([str(p.phoneme_id) for p in self.ojt_hello_hiho])
+        ojt_str_hello_hiho = " ".join([str(p.id) for p in self.ojt_hello_hiho])
         self.assertEqual(
             ojt_str_hello_hiho, "0 23 30 4 28 21 10 21 42 7 0 19 21 19 30 12 14 35 6 0"
         )
diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index e7417792a..73598b5fe 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -261,7 +261,7 @@ def test_update_pitch(self):
         def result_value(i: int) -> float:
             # unvoiced_vowel_likesのPhoneme ID版
             unvoiced_mora_tail_ids = [
-                Phoneme(p).phoneme_id for p in UNVOICED_MORA_TAIL_PHONEMES
+                Phoneme(p).id for p in UNVOICED_MORA_TAIL_PHONEMES
             ]
             if vowel_phoneme_list[i] in unvoiced_mora_tail_ids:
                 return 0
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index 76bba5357..3d0bfa6ad 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -120,7 +120,7 @@ def __init__(self, phoneme: str):
         # self.phoneme: Vowel | Consonant = phoneme
 
     @property
-    def phoneme_id(self) -> int:
+    def id(self) -> int:
         """音素ID (音素リスト内でのindex) を取得する"""
         return self._PHONEME_LIST.index(self.phoneme)
 
@@ -128,7 +128,7 @@ def phoneme_id(self) -> int:
     def onehot(self) -> NDArray[np.float32]:
         """音素onehotベクトルを取得する"""
         vec = np.zeros(self._NUM_PHONEME, dtype=np.float32)
-        vec[self.phoneme_id] = 1.0
+        vec[self.id] = 1.0
         return vec
 
     def is_mora_tail(self) -> bool:
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index b113f8444..610330bda 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -251,7 +251,7 @@ def update_length(
         phonemes = to_flatten_phonemes(moras)
 
         # 音素クラスから音素IDスカラへ表現を変換する
-        phoneme_ids = np.array([p.phoneme_id for p in phonemes], dtype=np.int64)
+        phoneme_ids = np.array([p.id for p in phonemes], dtype=np.int64)
 
         # コアを用いて音素長を生成する
         phoneme_lengths = self._core.safe_yukarin_s_forward(phoneme_ids, style_id)
@@ -306,12 +306,11 @@ def update_pitch(
 
         # モーラ系列から子音ID系列・母音ID系列を抽出する
         consonant_id_ints = [
-            Phoneme(mora.consonant).phoneme_id if mora.consonant else -1
-            for mora in moras
+            Phoneme(mora.consonant).id if mora.consonant else -1 for mora in moras
         ]
         consonant_ids = np.array(consonant_id_ints, dtype=np.int64)
         vowels = [Phoneme(mora.vowel) for mora in moras]
-        vowel_ids = np.array([p.phoneme_id for p in vowels], dtype=np.int64)
+        vowel_ids = np.array([p.id for p in vowels], dtype=np.int64)
 
         # コアを用いてモーラ音高を生成する
         f0 = self._core.safe_yukarin_sa_forward(

From 0dda8cca7fb0c7d9d935034da66c8bbd0f5ef74c Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Thu, 11 Jan 2024 14:20:19 +0900
Subject: [PATCH 139/177] =?UTF-8?q?=EF=BC=92=EF=BC=90=EF=BC=92=EF=BC=94?=
 =?UTF-8?q?=E5=B9=B4=EF=BC=91=E6=9C=88=E3=82=B9=E3=82=BF=E3=82=A4=E3=83=AB?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0=20(#1005)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/build-docker.yml       | 4 ++--
 .github/workflows/build.yml              | 4 ++--
 Dockerfile                               | 4 ++--
 engine_manifest_assets/update_infos.json | 7 +++++++
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml
index 41eb4ca05..36b5a66ae 100644
--- a/.github/workflows/build-docker.yml
+++ b/.github/workflows/build-docker.yml
@@ -15,8 +15,8 @@ on:
 env:
   IMAGE_NAME: ${{ secrets.DOCKERHUB_USERNAME }}/voicevox_engine
   PYTHON_VERSION: "3.8.10"
-  VOICEVOX_RESOURCE_VERSION: "0.14.4"
-  VOICEVOX_CORE_VERSION: "0.14.5"
+  VOICEVOX_RESOURCE_VERSION: "0.14.5"
+  VOICEVOX_CORE_VERSION: "0.14.6"
   VOICEVOX_ENGINE_VERSION:
     |- # releaseタグ名か、workflow_dispatchでのバージョン名か、latestが入る
     ${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index bcffc4d51..5b9497926 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,8 +21,8 @@ on:
 
 env:
   PYTHON_VERSION: "3.8.10"
-  VOICEVOX_RESOURCE_VERSION: "0.14.4"
-  VOICEVOX_CORE_VERSION: "0.14.5"
+  VOICEVOX_RESOURCE_VERSION: "0.14.5"
+  VOICEVOX_CORE_VERSION: "0.14.6"
 
 jobs:
   config: # 全 jobs で利用する定数の定義. `env` が利用できないコンテキストでも利用できる.
diff --git a/Dockerfile b/Dockerfile
index c9f28b124..a3979706e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,7 +22,7 @@ EOF
 
 # assert VOICEVOX_CORE_VERSION >= 0.11.0 (ONNX)
 ARG VOICEVOX_CORE_ASSET_PREFIX=voicevox_core-linux-x64-cpu
-ARG VOICEVOX_CORE_VERSION=0.14.5
+ARG VOICEVOX_CORE_VERSION=0.14.6
 RUN <<EOF
     set -eux
 
@@ -238,7 +238,7 @@ RUN <<EOF
 EOF
 
 # Download Resource
-ARG VOICEVOX_RESOURCE_VERSION=0.14.4
+ARG VOICEVOX_RESOURCE_VERSION=0.14.5
 RUN <<EOF
     set -eux
 
diff --git a/engine_manifest_assets/update_infos.json b/engine_manifest_assets/update_infos.json
index 16c97c36d..b465a8b36 100644
--- a/engine_manifest_assets/update_infos.json
+++ b/engine_manifest_assets/update_infos.json
@@ -1,4 +1,11 @@
 [
+  {
+    "version": "0.14.7",
+    "descriptions": [
+      "キャラクター「小夜」「ずんだもん」「もち子さん」「青山龍星」のスタイルを追加・更新"
+    ],
+    "contributors": []
+  },
   {
     "version": "0.14.6",
     "descriptions": [

From 4fe753dcae8e63965420968ba87cf71ca8a79129 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Fri, 19 Jan 2024 04:29:57 +0900
Subject: [PATCH 140/177] =?UTF-8?q?OpenAPI=E3=81=AE=E3=82=B9=E3=83=8A?=
 =?UTF-8?q?=E3=83=83=E3=83=97=E3=82=B7=E3=83=A7=E3=83=83=E3=83=88=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=E8=BF=BD=E5=8A=A0=EF=BC=86=E4=B8=8D=E8=A6=81?=
 =?UTF-8?q?=E3=81=AAQuery=E3=82=92=E7=9C=81=E3=81=8F=20(#992)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 不要なQueryを省く＆OpenAPIのスナップショットテスト追加

* コメント追加

* lint

* update openapi.json
---
 run.py                                        |   24 +-
 ...\343\202\222\347\242\272\350\252\215.json" | 2957 +++++++++++++++++
 test/e2e/conftest.py                          |   10 +-
 test/e2e/test_openapi.py                      |   12 +
 4 files changed, 2989 insertions(+), 14 deletions(-)
 create mode 100644 "test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
 create mode 100644 test/e2e/test_openapi.py

diff --git a/run.py b/run.py
index 7041028cb..1f2c0907a 100644
--- a/run.py
+++ b/run.py
@@ -290,7 +290,7 @@ def get_core(core_version: Optional[str]) -> CoreAdapter:
     )
     def audio_query(
         text: str,
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> AudioQuery:
@@ -371,7 +371,7 @@ def audio_query_from_preset(
     )
     def accent_phrases(
         text: str,
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         is_kana: bool = False,
         core_version: str | None = None,
@@ -407,7 +407,7 @@ def accent_phrases(
     )
     def mora_data(
         accent_phrases: list[AccentPhrase],
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
@@ -425,7 +425,7 @@ def mora_data(
     )
     def mora_length(
         accent_phrases: list[AccentPhrase],
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
@@ -443,7 +443,7 @@ def mora_length(
     )
     def mora_pitch(
         accent_phrases: list[AccentPhrase],
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
@@ -468,7 +468,7 @@ def mora_pitch(
     )
     def synthesis(
         query: AudioQuery,
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         enable_interrogative_upspeak: bool = Query(  # noqa: B008
             default=True,
@@ -511,7 +511,7 @@ def synthesis(
     def cancellable_synthesis(
         query: AudioQuery,
         request: Request,
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
@@ -552,7 +552,7 @@ def cancellable_synthesis(
     )
     def multi_synthesis(
         queries: list[AudioQuery],
-        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        style_id: StyleId | None = None,
         speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
@@ -634,11 +634,11 @@ def morphable_targets(
     )
     def _synthesis_morphing(
         query: AudioQuery,
-        base_style_id: StyleId | None = Query(default=None),  # noqa: B008
+        base_style_id: StyleId | None = None,
         base_speaker: (StyleId | None) = Query(  # noqa: B008
             default=None, deprecated=True
         ),
-        target_style_id: StyleId | None = Query(default=None),  # noqa: B008
+        target_style_id: StyleId | None = None,
         target_speaker: (StyleId | None) = Query(  # noqa: B008
             default=None, deprecated=True
         ),
@@ -1032,7 +1032,7 @@ def uninstall_library(library_uuid: str) -> Response:
     def initialize_style_id(
         style_id: StyleId,
         skip_reinit: bool = Query(  # noqa: B008
-            False, description="既に初期化済みのスタイルの再初期化をスキップするかどうか"
+            default=False, description="既に初期化済みのスタイルの再初期化をスキップするかどうか"
         ),
         core_version: str | None = None,
     ) -> Response:
@@ -1059,7 +1059,7 @@ def is_initialized_style_id(
     def initialize_speaker(
         speaker: StyleId,
         skip_reinit: bool = Query(  # noqa: B008
-            False, description="既に初期化済みの話者の再初期化をスキップするかどうか"
+            default=False, description="既に初期化済みの話者の再初期化をスキップするかどうか"
         ),
         core_version: str | None = None,
     ) -> Response:
diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
new file mode 100644
index 000000000..0cf9b7443
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -0,0 +1,2957 @@
+{
+  "components": {
+    "schemas": {
+      "AccentPhrase": {
+        "description": "アクセント句ごとの情報",
+        "properties": {
+          "accent": {
+            "title": "アクセント箇所",
+            "type": "integer"
+          },
+          "is_interrogative": {
+            "default": false,
+            "title": "疑問系かどうか",
+            "type": "boolean"
+          },
+          "moras": {
+            "items": {
+              "$ref": "#/components/schemas/Mora"
+            },
+            "title": "モーラのリスト",
+            "type": "array"
+          },
+          "pause_mora": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Mora"
+              }
+            ],
+            "title": "後ろに無音を付けるかどうか"
+          }
+        },
+        "required": [
+          "moras",
+          "accent"
+        ],
+        "title": "AccentPhrase",
+        "type": "object"
+      },
+      "AudioQuery": {
+        "description": "音声合成用のクエリ",
+        "properties": {
+          "accent_phrases": {
+            "items": {
+              "$ref": "#/components/schemas/AccentPhrase"
+            },
+            "title": "アクセント句のリスト",
+            "type": "array"
+          },
+          "intonationScale": {
+            "title": "全体の抑揚",
+            "type": "number"
+          },
+          "kana": {
+            "title": "[読み取り専用]AquesTalk 風記法によるテキスト。音声合成用のクエリとしては無視される",
+            "type": "string"
+          },
+          "outputSamplingRate": {
+            "title": "音声データの出力サンプリングレート",
+            "type": "integer"
+          },
+          "outputStereo": {
+            "title": "音声データをステレオ出力するか否か",
+            "type": "boolean"
+          },
+          "pitchScale": {
+            "title": "全体の音高",
+            "type": "number"
+          },
+          "postPhonemeLength": {
+            "title": "音声の後の無音時間",
+            "type": "number"
+          },
+          "prePhonemeLength": {
+            "title": "音声の前の無音時間",
+            "type": "number"
+          },
+          "speedScale": {
+            "title": "全体の話速",
+            "type": "number"
+          },
+          "volumeScale": {
+            "title": "全体の音量",
+            "type": "number"
+          }
+        },
+        "required": [
+          "accent_phrases",
+          "speedScale",
+          "pitchScale",
+          "intonationScale",
+          "volumeScale",
+          "prePhonemeLength",
+          "postPhonemeLength",
+          "outputSamplingRate",
+          "outputStereo"
+        ],
+        "title": "AudioQuery",
+        "type": "object"
+      },
+      "BaseLibraryInfo": {
+        "description": "音声ライブラリの情報",
+        "properties": {
+          "bytes": {
+            "title": "音声ライブラリのバイト数",
+            "type": "integer"
+          },
+          "download_url": {
+            "title": "音声ライブラリのダウンロードURL",
+            "type": "string"
+          },
+          "name": {
+            "title": "音声ライブラリの名前",
+            "type": "string"
+          },
+          "speakers": {
+            "items": {
+              "$ref": "#/components/schemas/LibrarySpeaker"
+            },
+            "title": "音声ライブラリに含まれる話者のリスト",
+            "type": "array"
+          },
+          "uuid": {
+            "title": "音声ライブラリのUUID",
+            "type": "string"
+          },
+          "version": {
+            "title": "音声ライブラリのバージョン",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "uuid",
+          "version",
+          "download_url",
+          "bytes",
+          "speakers"
+        ],
+        "title": "BaseLibraryInfo",
+        "type": "object"
+      },
+      "Body_setting_post_setting_post": {
+        "properties": {
+          "allow_origin": {
+            "title": "Allow Origin",
+            "type": "string"
+          },
+          "cors_policy_mode": {
+            "$ref": "#/components/schemas/CorsPolicyMode"
+          }
+        },
+        "required": [
+          "cors_policy_mode"
+        ],
+        "title": "Body_setting_post_setting_post",
+        "type": "object"
+      },
+      "CorsPolicyMode": {
+        "description": "CORSの許可モード",
+        "enum": [
+          "all",
+          "localapps"
+        ],
+        "title": "CorsPolicyMode",
+        "type": "string"
+      },
+      "DownloadableLibraryInfo": {
+        "description": "ダウンロード可能な音声ライブラリの情報",
+        "properties": {
+          "bytes": {
+            "title": "音声ライブラリのバイト数",
+            "type": "integer"
+          },
+          "download_url": {
+            "title": "音声ライブラリのダウンロードURL",
+            "type": "string"
+          },
+          "name": {
+            "title": "音声ライブラリの名前",
+            "type": "string"
+          },
+          "speakers": {
+            "items": {
+              "$ref": "#/components/schemas/LibrarySpeaker"
+            },
+            "title": "音声ライブラリに含まれる話者のリスト",
+            "type": "array"
+          },
+          "uuid": {
+            "title": "音声ライブラリのUUID",
+            "type": "string"
+          },
+          "version": {
+            "title": "音声ライブラリのバージョン",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "uuid",
+          "version",
+          "download_url",
+          "bytes",
+          "speakers"
+        ],
+        "title": "DownloadableLibraryInfo",
+        "type": "object"
+      },
+      "EngineManifest": {
+        "description": "エンジン自体に関する情報",
+        "properties": {
+          "brand_name": {
+            "title": "ブランド名",
+            "type": "string"
+          },
+          "default_sampling_rate": {
+            "title": "デフォルトのサンプリング周波数",
+            "type": "integer"
+          },
+          "dependency_licenses": {
+            "items": {
+              "$ref": "#/components/schemas/LicenseInfo"
+            },
+            "title": "依存関係のライセンス情報",
+            "type": "array"
+          },
+          "icon": {
+            "title": "エンジンのアイコンをBASE64エンコードしたもの",
+            "type": "string"
+          },
+          "manifest_version": {
+            "title": "マニフェストのバージョン",
+            "type": "string"
+          },
+          "name": {
+            "title": "エンジン名",
+            "type": "string"
+          },
+          "supported_features": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/SupportedFeatures"
+              }
+            ],
+            "title": "エンジンが持つ機能"
+          },
+          "supported_vvlib_manifest_version": {
+            "title": "エンジンが対応するvvlibのバージョン",
+            "type": "string"
+          },
+          "terms_of_service": {
+            "title": "エンジンの利用規約",
+            "type": "string"
+          },
+          "update_infos": {
+            "items": {
+              "$ref": "#/components/schemas/UpdateInfo"
+            },
+            "title": "エンジンのアップデート情報",
+            "type": "array"
+          },
+          "url": {
+            "title": "エンジンのURL",
+            "type": "string"
+          },
+          "uuid": {
+            "title": "エンジンのUUID",
+            "type": "string"
+          }
+        },
+        "required": [
+          "manifest_version",
+          "name",
+          "brand_name",
+          "uuid",
+          "url",
+          "icon",
+          "default_sampling_rate",
+          "terms_of_service",
+          "update_infos",
+          "dependency_licenses",
+          "supported_features"
+        ],
+        "title": "EngineManifest",
+        "type": "object"
+      },
+      "HTTPValidationError": {
+        "properties": {
+          "detail": {
+            "items": {
+              "$ref": "#/components/schemas/ValidationError"
+            },
+            "title": "Detail",
+            "type": "array"
+          }
+        },
+        "title": "HTTPValidationError",
+        "type": "object"
+      },
+      "InstalledLibraryInfo": {
+        "description": "インストール済み音声ライブラリの情報",
+        "properties": {
+          "bytes": {
+            "title": "音声ライブラリのバイト数",
+            "type": "integer"
+          },
+          "download_url": {
+            "title": "音声ライブラリのダウンロードURL",
+            "type": "string"
+          },
+          "name": {
+            "title": "音声ライブラリの名前",
+            "type": "string"
+          },
+          "speakers": {
+            "items": {
+              "$ref": "#/components/schemas/LibrarySpeaker"
+            },
+            "title": "音声ライブラリに含まれる話者のリスト",
+            "type": "array"
+          },
+          "uninstallable": {
+            "title": "アンインストール可能かどうか",
+            "type": "boolean"
+          },
+          "uuid": {
+            "title": "音声ライブラリのUUID",
+            "type": "string"
+          },
+          "version": {
+            "title": "音声ライブラリのバージョン",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "uuid",
+          "version",
+          "download_url",
+          "bytes",
+          "speakers",
+          "uninstallable"
+        ],
+        "title": "InstalledLibraryInfo",
+        "type": "object"
+      },
+      "LibrarySpeaker": {
+        "description": "音声ライブラリに含まれる話者の情報",
+        "properties": {
+          "speaker": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Speaker"
+              }
+            ],
+            "title": "話者情報"
+          },
+          "speaker_info": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/SpeakerInfo"
+              }
+            ],
+            "title": "話者の追加情報"
+          }
+        },
+        "required": [
+          "speaker",
+          "speaker_info"
+        ],
+        "title": "LibrarySpeaker",
+        "type": "object"
+      },
+      "LicenseInfo": {
+        "description": "依存ライブラリのライセンス情報",
+        "properties": {
+          "license": {
+            "title": "依存ライブラリのライセンス名",
+            "type": "string"
+          },
+          "name": {
+            "title": "依存ライブラリ名",
+            "type": "string"
+          },
+          "text": {
+            "title": "依存ライブラリのライセンス本文",
+            "type": "string"
+          },
+          "version": {
+            "title": "依存ライブラリのバージョン",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "text"
+        ],
+        "title": "LicenseInfo",
+        "type": "object"
+      },
+      "Mora": {
+        "description": "モーラ（子音＋母音）ごとの情報",
+        "properties": {
+          "consonant": {
+            "title": "子音の音素",
+            "type": "string"
+          },
+          "consonant_length": {
+            "title": "子音の音長",
+            "type": "number"
+          },
+          "pitch": {
+            "title": "音高",
+            "type": "number"
+          },
+          "text": {
+            "title": "文字",
+            "type": "string"
+          },
+          "vowel": {
+            "title": "母音の音素",
+            "type": "string"
+          },
+          "vowel_length": {
+            "title": "母音の音長",
+            "type": "number"
+          }
+        },
+        "required": [
+          "text",
+          "vowel",
+          "vowel_length",
+          "pitch"
+        ],
+        "title": "Mora",
+        "type": "object"
+      },
+      "MorphableTargetInfo": {
+        "properties": {
+          "is_morphable": {
+            "title": "指定した話者に対してモーフィングの可否",
+            "type": "boolean"
+          }
+        },
+        "required": [
+          "is_morphable"
+        ],
+        "title": "MorphableTargetInfo",
+        "type": "object"
+      },
+      "ParseKanaBadRequest": {
+        "properties": {
+          "error_args": {
+            "additionalProperties": {
+              "type": "string"
+            },
+            "title": "エラーを起こした箇所",
+            "type": "object"
+          },
+          "error_name": {
+            "description": "|name|description|\n|---|---|\n| UNKNOWN_TEXT | 判別できない読み仮名があります: {text} |\n| ACCENT_TOP | 句頭にアクセントは置けません: {text} |\n| ACCENT_TWICE | 1つのアクセント句に二つ以上のアクセントは置けません: {text} |\n| ACCENT_NOTFOUND | アクセントを指定していないアクセント句があります: {text} |\n| EMPTY_PHRASE | {position}番目のアクセント句が空白です |\n| INTERROGATION_MARK_NOT_AT_END | アクセント句末以外に「？」は置けません: {text} |\n| INFINITE_LOOP | 処理時に無限ループになってしまいました...バグ報告をお願いします。 |",
+            "title": "エラー名",
+            "type": "string"
+          },
+          "text": {
+            "title": "エラーメッセージ",
+            "type": "string"
+          }
+        },
+        "required": [
+          "text",
+          "error_name",
+          "error_args"
+        ],
+        "title": "ParseKanaBadRequest",
+        "type": "object"
+      },
+      "Preset": {
+        "description": "プリセット情報",
+        "properties": {
+          "id": {
+            "title": "プリセットID",
+            "type": "integer"
+          },
+          "intonationScale": {
+            "title": "全体の抑揚",
+            "type": "number"
+          },
+          "name": {
+            "title": "プリセット名",
+            "type": "string"
+          },
+          "pitchScale": {
+            "title": "全体の音高",
+            "type": "number"
+          },
+          "postPhonemeLength": {
+            "title": "音声の後の無音時間",
+            "type": "number"
+          },
+          "prePhonemeLength": {
+            "title": "音声の前の無音時間",
+            "type": "number"
+          },
+          "speaker_uuid": {
+            "title": "話者のUUID",
+            "type": "string"
+          },
+          "speedScale": {
+            "title": "全体の話速",
+            "type": "number"
+          },
+          "style_id": {
+            "title": "スタイルID",
+            "type": "integer"
+          },
+          "volumeScale": {
+            "title": "全体の音量",
+            "type": "number"
+          }
+        },
+        "required": [
+          "id",
+          "name",
+          "speaker_uuid",
+          "style_id",
+          "speedScale",
+          "pitchScale",
+          "intonationScale",
+          "volumeScale",
+          "prePhonemeLength",
+          "postPhonemeLength"
+        ],
+        "title": "Preset",
+        "type": "object"
+      },
+      "Speaker": {
+        "description": "話者情報",
+        "properties": {
+          "name": {
+            "title": "名前",
+            "type": "string"
+          },
+          "speaker_uuid": {
+            "title": "話者のUUID",
+            "type": "string"
+          },
+          "styles": {
+            "items": {
+              "$ref": "#/components/schemas/SpeakerStyle"
+            },
+            "title": "スタイルの一覧",
+            "type": "array"
+          },
+          "supported_features": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/SpeakerSupportedFeatures"
+              }
+            ],
+            "title": "話者の対応機能"
+          },
+          "version": {
+            "default": "話者のバージョン",
+            "title": "Version",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "speaker_uuid",
+          "styles"
+        ],
+        "title": "Speaker",
+        "type": "object"
+      },
+      "SpeakerInfo": {
+        "description": "話者の追加情報",
+        "properties": {
+          "policy": {
+            "title": "policy.md",
+            "type": "string"
+          },
+          "portrait": {
+            "title": "portrait.pngをbase64エンコードしたもの",
+            "type": "string"
+          },
+          "style_infos": {
+            "items": {
+              "$ref": "#/components/schemas/StyleInfo"
+            },
+            "title": "スタイルの追加情報",
+            "type": "array"
+          }
+        },
+        "required": [
+          "policy",
+          "portrait",
+          "style_infos"
+        ],
+        "title": "SpeakerInfo",
+        "type": "object"
+      },
+      "SpeakerStyle": {
+        "description": "話者のスタイル情報",
+        "properties": {
+          "id": {
+            "title": "スタイルID",
+            "type": "integer"
+          },
+          "name": {
+            "title": "スタイル名",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "id"
+        ],
+        "title": "SpeakerStyle",
+        "type": "object"
+      },
+      "SpeakerSupportPermittedSynthesisMorphing": {
+        "description": "An enumeration.",
+        "enum": [
+          "ALL",
+          "SELF_ONLY",
+          "NOTHING"
+        ],
+        "title": "SpeakerSupportPermittedSynthesisMorphing",
+        "type": "string"
+      },
+      "SpeakerSupportedFeatures": {
+        "description": "話者の対応機能の情報",
+        "properties": {
+          "permitted_synthesis_morphing": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/SpeakerSupportPermittedSynthesisMorphing"
+              }
+            ],
+            "default": "ALL",
+            "title": "モーフィング機能への対応"
+          }
+        },
+        "title": "SpeakerSupportedFeatures",
+        "type": "object"
+      },
+      "StyleInfo": {
+        "description": "スタイルの追加情報",
+        "properties": {
+          "icon": {
+            "title": "当該スタイルのアイコンをbase64エンコードしたもの",
+            "type": "string"
+          },
+          "id": {
+            "title": "スタイルID",
+            "type": "integer"
+          },
+          "portrait": {
+            "title": "当該スタイルのportrait.pngをbase64エンコードしたもの",
+            "type": "string"
+          },
+          "voice_samples": {
+            "items": {
+              "type": "string"
+            },
+            "title": "voice_sampleのwavファイルをbase64エンコードしたもの",
+            "type": "array"
+          }
+        },
+        "required": [
+          "id",
+          "icon",
+          "voice_samples"
+        ],
+        "title": "StyleInfo",
+        "type": "object"
+      },
+      "SupportedDevicesInfo": {
+        "description": "対応しているデバイスの情報",
+        "properties": {
+          "cpu": {
+            "title": "CPUに対応しているか",
+            "type": "boolean"
+          },
+          "cuda": {
+            "title": "CUDA(Nvidia GPU)に対応しているか",
+            "type": "boolean"
+          },
+          "dml": {
+            "title": "DirectML(Nvidia GPU/Radeon GPU等)に対応しているか",
+            "type": "boolean"
+          }
+        },
+        "required": [
+          "cpu",
+          "cuda",
+          "dml"
+        ],
+        "title": "SupportedDevicesInfo",
+        "type": "object"
+      },
+      "SupportedFeatures": {
+        "description": "エンジンが持つ機能の一覧",
+        "properties": {
+          "adjust_intonation_scale": {
+            "title": "全体の抑揚の調整",
+            "type": "boolean"
+          },
+          "adjust_mora_pitch": {
+            "title": "モーラごとの音高の調整",
+            "type": "boolean"
+          },
+          "adjust_phoneme_length": {
+            "title": "音素ごとの長さの調整",
+            "type": "boolean"
+          },
+          "adjust_pitch_scale": {
+            "title": "全体の音高の調整",
+            "type": "boolean"
+          },
+          "adjust_speed_scale": {
+            "title": "全体の話速の調整",
+            "type": "boolean"
+          },
+          "adjust_volume_scale": {
+            "title": "全体の音量の調整",
+            "type": "boolean"
+          },
+          "interrogative_upspeak": {
+            "title": "疑問文の自動調整",
+            "type": "boolean"
+          },
+          "manage_library": {
+            "title": "音声ライブラリのインストール・アンインストール",
+            "type": "boolean"
+          },
+          "synthesis_morphing": {
+            "title": "2種類のスタイルでモーフィングした音声を合成",
+            "type": "boolean"
+          }
+        },
+        "required": [
+          "adjust_mora_pitch",
+          "adjust_phoneme_length",
+          "adjust_speed_scale",
+          "adjust_pitch_scale",
+          "adjust_intonation_scale",
+          "adjust_volume_scale",
+          "interrogative_upspeak",
+          "synthesis_morphing"
+        ],
+        "title": "SupportedFeatures",
+        "type": "object"
+      },
+      "UpdateInfo": {
+        "description": "エンジンのアップデート情報",
+        "properties": {
+          "contributors": {
+            "items": {
+              "type": "string"
+            },
+            "title": "貢献者名",
+            "type": "array"
+          },
+          "descriptions": {
+            "items": {
+              "type": "string"
+            },
+            "title": "アップデートの詳細についての説明",
+            "type": "array"
+          },
+          "version": {
+            "title": "エンジンのバージョン名",
+            "type": "string"
+          }
+        },
+        "required": [
+          "version",
+          "descriptions"
+        ],
+        "title": "UpdateInfo",
+        "type": "object"
+      },
+      "UserDictWord": {
+        "description": "辞書のコンパイルに使われる情報",
+        "properties": {
+          "accent_associative_rule": {
+            "title": "アクセント結合規則",
+            "type": "string"
+          },
+          "accent_type": {
+            "title": "アクセント型",
+            "type": "integer"
+          },
+          "context_id": {
+            "default": 1348,
+            "title": "文脈ID",
+            "type": "integer"
+          },
+          "inflectional_form": {
+            "title": "活用形",
+            "type": "string"
+          },
+          "inflectional_type": {
+            "title": "活用型",
+            "type": "string"
+          },
+          "mora_count": {
+            "title": "モーラ数",
+            "type": "integer"
+          },
+          "part_of_speech": {
+            "title": "品詞",
+            "type": "string"
+          },
+          "part_of_speech_detail_1": {
+            "title": "品詞細分類1",
+            "type": "string"
+          },
+          "part_of_speech_detail_2": {
+            "title": "品詞細分類2",
+            "type": "string"
+          },
+          "part_of_speech_detail_3": {
+            "title": "品詞細分類3",
+            "type": "string"
+          },
+          "priority": {
+            "maximum": 10.0,
+            "minimum": 0.0,
+            "title": "優先度",
+            "type": "integer"
+          },
+          "pronunciation": {
+            "title": "発音",
+            "type": "string"
+          },
+          "stem": {
+            "title": "原形",
+            "type": "string"
+          },
+          "surface": {
+            "title": "表層形",
+            "type": "string"
+          },
+          "yomi": {
+            "title": "読み",
+            "type": "string"
+          }
+        },
+        "required": [
+          "surface",
+          "priority",
+          "part_of_speech",
+          "part_of_speech_detail_1",
+          "part_of_speech_detail_2",
+          "part_of_speech_detail_3",
+          "inflectional_type",
+          "inflectional_form",
+          "stem",
+          "yomi",
+          "pronunciation",
+          "accent_type",
+          "accent_associative_rule"
+        ],
+        "title": "UserDictWord",
+        "type": "object"
+      },
+      "ValidationError": {
+        "properties": {
+          "loc": {
+            "items": {
+              "anyOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "integer"
+                }
+              ]
+            },
+            "title": "Location",
+            "type": "array"
+          },
+          "msg": {
+            "title": "Message",
+            "type": "string"
+          },
+          "type": {
+            "title": "Error Type",
+            "type": "string"
+          }
+        },
+        "required": [
+          "loc",
+          "msg",
+          "type"
+        ],
+        "title": "ValidationError",
+        "type": "object"
+      },
+      "VvlibManifest": {
+        "description": "vvlib(VOICEVOX Library)に関する情報",
+        "properties": {
+          "brand_name": {
+            "title": "エンジンのブランド名",
+            "type": "string"
+          },
+          "engine_name": {
+            "title": "エンジン名",
+            "type": "string"
+          },
+          "engine_uuid": {
+            "title": "エンジンのUUID",
+            "type": "string"
+          },
+          "manifest_version": {
+            "title": "マニフェストバージョン",
+            "type": "string"
+          },
+          "name": {
+            "title": "音声ライブラリ名",
+            "type": "string"
+          },
+          "uuid": {
+            "title": "音声ライブラリのUUID",
+            "type": "string"
+          },
+          "version": {
+            "title": "音声ライブラリバージョン",
+            "type": "string"
+          }
+        },
+        "required": [
+          "manifest_version",
+          "name",
+          "version",
+          "uuid",
+          "brand_name",
+          "engine_name",
+          "engine_uuid"
+        ],
+        "title": "VvlibManifest",
+        "type": "object"
+      },
+      "WordTypes": {
+        "description": "fastapiでword_type引数を検証する時に使用するクラス",
+        "enum": [
+          "PROPER_NOUN",
+          "COMMON_NOUN",
+          "VERB",
+          "ADJECTIVE",
+          "SUFFIX"
+        ],
+        "title": "WordTypes",
+        "type": "string"
+      }
+    }
+  },
+  "info": {
+    "description": "VOICEVOXの音声合成エンジンです。",
+    "title": "VOICEVOX Engine",
+    "version": "latest"
+  },
+  "openapi": "3.1.0",
+  "paths": {
+    "/accent_phrases": {
+      "post": {
+        "description": "テキストからアクセント句を得ます。\nis_kanaが`true`のとき、テキストは次のAquesTalk 風記法で解釈されます。デフォルトは`false`です。\n* 全てのカナはカタカナで記述される\n* アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。\n* カナの手前に`_`を入れるとそのカナは無声化される\n* アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。\n* アクセント句末に`？`(全角)を入れることにより疑問文の発音ができる。",
+        "operationId": "accent_phrases_accent_phrases_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "text",
+            "required": true,
+            "schema": {
+              "title": "Text",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "is_kana",
+            "required": false,
+            "schema": {
+              "default": false,
+              "title": "Is Kana",
+              "type": "boolean"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/AccentPhrase"
+                  },
+                  "title": "Response Accent Phrases Accent Phrases Post",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "400": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ParseKanaBadRequest"
+                }
+              }
+            },
+            "description": "読み仮名のパースに失敗"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "テキストからアクセント句を得る",
+        "tags": [
+          "クエリ編集"
+        ]
+      }
+    },
+    "/add_preset": {
+      "post": {
+        "description": "新しいプリセットを追加します\n\nParameters\n-------\npreset: Preset\n    新しいプリセット。\n    プリセットIDが既存のものと重複している場合は、新規のプリセットIDが採番されます。\n\nReturns\n-------\nid: int\n    追加したプリセットのプリセットID",
+        "operationId": "add_preset_add_preset_post",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/Preset"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Add Preset Add Preset Post",
+                  "type": "integer"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Add Preset",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/audio_query": {
+      "post": {
+        "description": "音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。",
+        "operationId": "audio_query_audio_query_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "text",
+            "required": true,
+            "schema": {
+              "title": "Text",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/AudioQuery"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "音声合成用のクエリを作成する",
+        "tags": [
+          "クエリ作成"
+        ]
+      }
+    },
+    "/audio_query_from_preset": {
+      "post": {
+        "description": "音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。",
+        "operationId": "audio_query_from_preset_audio_query_from_preset_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "text",
+            "required": true,
+            "schema": {
+              "title": "Text",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "preset_id",
+            "required": true,
+            "schema": {
+              "title": "Preset Id",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/AudioQuery"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "音声合成用のクエリをプリセットを用いて作成する",
+        "tags": [
+          "クエリ作成"
+        ]
+      }
+    },
+    "/cancellable_synthesis": {
+      "post": {
+        "operationId": "cancellable_synthesis_cancellable_synthesis_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AudioQuery"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "audio/wav": {
+                "schema": {
+                  "format": "binary",
+                  "type": "string"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "音声合成する（キャンセル可能）",
+        "tags": [
+          "音声合成"
+        ]
+      }
+    },
+    "/connect_waves": {
+      "post": {
+        "description": "base64エンコードされたwavデータを一纏めにし、wavファイルで返します。",
+        "operationId": "connect_waves_connect_waves_post",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "items": {
+                  "type": "string"
+                },
+                "title": "Waves",
+                "type": "array"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "audio/wav": {
+                "schema": {
+                  "format": "binary",
+                  "type": "string"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "base64エンコードされた複数のwavデータを一つに結合する",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/core_versions": {
+      "get": {
+        "operationId": "core_versions_core_versions_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "type": "string"
+                  },
+                  "title": "Response Core Versions Core Versions Get",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Core Versions",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/delete_preset": {
+      "post": {
+        "description": "既存のプリセットを削除します\n\nParameters\n-------\nid: int\n    削除するプリセットのプリセットID",
+        "operationId": "delete_preset_delete_preset_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "id",
+            "required": true,
+            "schema": {
+              "title": "Id",
+              "type": "integer"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Delete Preset",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/downloadable_libraries": {
+      "get": {
+        "description": "ダウンロード可能な音声ライブラリの情報を返します。\n\nReturns\n-------\nret_data: list[DownloadableLibrary]",
+        "operationId": "downloadable_libraries_downloadable_libraries_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/DownloadableLibraryInfo"
+                  },
+                  "title": "Response Downloadable Libraries Downloadable Libraries Get",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Downloadable Libraries",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
+    "/engine_manifest": {
+      "get": {
+        "operationId": "engine_manifest_engine_manifest_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/EngineManifest"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Engine Manifest",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/import_user_dict": {
+      "post": {
+        "description": "他のユーザー辞書をインポートします。\n\nParameters\n----------\nimport_dict_data: dict[str, UserDictWord]\n    インポートするユーザー辞書のデータ\noverride: bool\n    重複したエントリがあった場合、上書きするかどうか",
+        "operationId": "import_user_dict_words_import_user_dict_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "override",
+            "required": true,
+            "schema": {
+              "title": "Override",
+              "type": "boolean"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "additionalProperties": {
+                  "$ref": "#/components/schemas/UserDictWord"
+                },
+                "title": "Import Dict Data",
+                "type": "object"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Import User Dict Words",
+        "tags": [
+          "ユーザー辞書"
+        ]
+      }
+    },
+    "/initialize_speaker": {
+      "post": {
+        "deprecated": true,
+        "description": "こちらのAPIは非推奨です。`initialize_style_id`を利用してください。",
+        "operationId": "initialize_speaker_initialize_speaker_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "speaker",
+            "required": true,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "description": "既に初期化済みの話者の再初期化をスキップするかどうか",
+            "in": "query",
+            "name": "skip_reinit",
+            "required": false,
+            "schema": {
+              "default": false,
+              "description": "既に初期化済みの話者の再初期化をスキップするかどうか",
+              "title": "Skip Reinit",
+              "type": "boolean"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Initialize Speaker",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/initialize_style_id": {
+      "post": {
+        "description": "指定されたstyle_idのスタイルを初期化します。\n実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。",
+        "operationId": "initialize_style_id_initialize_style_id_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": true,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "description": "既に初期化済みのスタイルの再初期化をスキップするかどうか",
+            "in": "query",
+            "name": "skip_reinit",
+            "required": false,
+            "schema": {
+              "default": false,
+              "description": "既に初期化済みのスタイルの再初期化をスキップするかどうか",
+              "title": "Skip Reinit",
+              "type": "boolean"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Initialize Style Id",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/install_library/{library_uuid}": {
+      "post": {
+        "description": "音声ライブラリをインストールします。\n音声ライブラリのZIPファイルをリクエストボディとして送信してください。\n\nParameters\n----------\nlibrary_uuid: str\n    音声ライブラリのID",
+        "operationId": "install_library_install_library__library_uuid__post",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "library_uuid",
+            "required": true,
+            "schema": {
+              "title": "Library Uuid",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Install Library",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
+    "/installed_libraries": {
+      "get": {
+        "description": "インストールした音声ライブラリの情報を返します。\n\nReturns\n-------\nret_data: dict[str, InstalledLibrary]",
+        "operationId": "installed_libraries_installed_libraries_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "additionalProperties": {
+                    "$ref": "#/components/schemas/InstalledLibraryInfo"
+                  },
+                  "title": "Response Installed Libraries Installed Libraries Get",
+                  "type": "object"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Installed Libraries",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
+    "/is_initialized_speaker": {
+      "get": {
+        "deprecated": true,
+        "description": "こちらのAPIは非推奨です。`is_initialize_style_id`を利用してください。",
+        "operationId": "is_initialized_speaker_is_initialized_speaker_get",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "speaker",
+            "required": true,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Is Initialized Speaker Is Initialized Speaker Get",
+                  "type": "boolean"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Is Initialized Speaker",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/is_initialized_style_id": {
+      "get": {
+        "description": "指定されたstyle_idのスタイルが初期化されているかどうかを返します。",
+        "operationId": "is_initialized_style_id_is_initialized_style_id_get",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": true,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Is Initialized Style Id Is Initialized Style Id Get",
+                  "type": "boolean"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Is Initialized Style Id",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/mora_data": {
+      "post": {
+        "operationId": "mora_data_mora_data_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "items": {
+                  "$ref": "#/components/schemas/AccentPhrase"
+                },
+                "title": "Accent Phrases",
+                "type": "array"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/AccentPhrase"
+                  },
+                  "title": "Response Mora Data Mora Data Post",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "アクセント句から音高・音素長を得る",
+        "tags": [
+          "クエリ編集"
+        ]
+      }
+    },
+    "/mora_length": {
+      "post": {
+        "operationId": "mora_length_mora_length_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "items": {
+                  "$ref": "#/components/schemas/AccentPhrase"
+                },
+                "title": "Accent Phrases",
+                "type": "array"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/AccentPhrase"
+                  },
+                  "title": "Response Mora Length Mora Length Post",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "アクセント句から音素長を得る",
+        "tags": [
+          "クエリ編集"
+        ]
+      }
+    },
+    "/mora_pitch": {
+      "post": {
+        "operationId": "mora_pitch_mora_pitch_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "items": {
+                  "$ref": "#/components/schemas/AccentPhrase"
+                },
+                "title": "Accent Phrases",
+                "type": "array"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/AccentPhrase"
+                  },
+                  "title": "Response Mora Pitch Mora Pitch Post",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "アクセント句から音高を得る",
+        "tags": [
+          "クエリ編集"
+        ]
+      }
+    },
+    "/morphable_targets": {
+      "post": {
+        "description": "指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。\nモーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。\nプロパティが存在しない場合は、モーフィングが許可されているとみなします。\n返り値の話者はstring型なので注意。",
+        "operationId": "morphable_targets_morphable_targets_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "items": {
+                  "type": "integer"
+                },
+                "title": "Base Style Ids",
+                "type": "array"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "additionalProperties": {
+                      "$ref": "#/components/schemas/MorphableTargetInfo"
+                    },
+                    "type": "object"
+                  },
+                  "title": "Response Morphable Targets Morphable Targets Post",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "指定したスタイルに対してエンジン内の話者がモーフィングが可能か判定する",
+        "tags": [
+          "音声合成"
+        ]
+      }
+    },
+    "/multi_synthesis": {
+      "post": {
+        "operationId": "multi_synthesis_multi_synthesis_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "items": {
+                  "$ref": "#/components/schemas/AudioQuery"
+                },
+                "title": "Queries",
+                "type": "array"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "application/zip": {
+                "schema": {
+                  "format": "binary",
+                  "type": "string"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "複数まとめて音声合成する",
+        "tags": [
+          "音声合成"
+        ]
+      }
+    },
+    "/presets": {
+      "get": {
+        "description": "エンジンが保持しているプリセットの設定を返します\n\nReturns\n-------\npresets: list[Preset]\n    プリセットのリスト",
+        "operationId": "get_presets_presets_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/Preset"
+                  },
+                  "title": "Response Get Presets Presets Get",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Get Presets",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/setting": {
+      "get": {
+        "description": "設定ページを返します。",
+        "operationId": "setting_get_setting_get",
+        "responses": {
+          "200": {
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Setting Get",
+        "tags": [
+          "設定"
+        ]
+      },
+      "post": {
+        "description": "設定を更新します。",
+        "operationId": "setting_post_setting_post",
+        "requestBody": {
+          "content": {
+            "application/x-www-form-urlencoded": {
+              "schema": {
+                "$ref": "#/components/schemas/Body_setting_post_setting_post"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Setting Post",
+        "tags": [
+          "設定"
+        ]
+      }
+    },
+    "/speaker_info": {
+      "get": {
+        "description": "指定されたspeaker_uuidに関する情報をjson形式で返します。\n画像や音声はbase64エンコードされたものが返されます。\n\nReturns\n-------\nret_data: SpeakerInfo",
+        "operationId": "speaker_info_speaker_info_get",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "speaker_uuid",
+            "required": true,
+            "schema": {
+              "title": "Speaker Uuid",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SpeakerInfo"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Speaker Info",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/speakers": {
+      "get": {
+        "operationId": "speakers_speakers_get",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/Speaker"
+                  },
+                  "title": "Response Speakers Speakers Get",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Speakers",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/supported_devices": {
+      "get": {
+        "operationId": "supported_devices_supported_devices_get",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SupportedDevicesInfo"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Supported Devices",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/synthesis": {
+      "post": {
+        "operationId": "synthesis_synthesis_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "style_id",
+            "required": false,
+            "schema": {
+              "title": "Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "speaker",
+            "required": false,
+            "schema": {
+              "title": "Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "description": "疑問系のテキストが与えられたら語尾を自動調整する",
+            "in": "query",
+            "name": "enable_interrogative_upspeak",
+            "required": false,
+            "schema": {
+              "default": true,
+              "description": "疑問系のテキストが与えられたら語尾を自動調整する",
+              "title": "Enable Interrogative Upspeak",
+              "type": "boolean"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AudioQuery"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "audio/wav": {
+                "schema": {
+                  "format": "binary",
+                  "type": "string"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "音声合成する",
+        "tags": [
+          "音声合成"
+        ]
+      }
+    },
+    "/synthesis_morphing": {
+      "post": {
+        "description": "指定された2種類のスタイルで音声を合成、指定した割合でモーフィングした音声を得ます。\nモーフィングの割合は`morph_rate`で指定でき、0.0でベースのスタイル、1.0でターゲットのスタイルに近づきます。",
+        "operationId": "_synthesis_morphing_synthesis_morphing_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "base_style_id",
+            "required": false,
+            "schema": {
+              "title": "Base Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "base_speaker",
+            "required": false,
+            "schema": {
+              "title": "Base Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "target_style_id",
+            "required": false,
+            "schema": {
+              "title": "Target Style Id",
+              "type": "integer"
+            }
+          },
+          {
+            "deprecated": true,
+            "in": "query",
+            "name": "target_speaker",
+            "required": false,
+            "schema": {
+              "title": "Target Speaker",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "morph_rate",
+            "required": true,
+            "schema": {
+              "maximum": 1.0,
+              "minimum": 0.0,
+              "title": "Morph Rate",
+              "type": "number"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AudioQuery"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "audio/wav": {
+                "schema": {
+                  "format": "binary",
+                  "type": "string"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "2種類のスタイルでモーフィングした音声を合成する",
+        "tags": [
+          "音声合成"
+        ]
+      }
+    },
+    "/uninstall_library/{library_uuid}": {
+      "post": {
+        "description": "音声ライブラリをアンインストールします。\n\nParameters\n----------\nlibrary_uuid: str\n    音声ライブラリのID",
+        "operationId": "uninstall_library_uninstall_library__library_uuid__post",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "library_uuid",
+            "required": true,
+            "schema": {
+              "title": "Library Uuid",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Uninstall Library",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
+    "/update_preset": {
+      "post": {
+        "description": "既存のプリセットを更新します\n\nParameters\n-------\npreset: Preset\n    更新するプリセット。\n    プリセットIDが更新対象と一致している必要があります。\n\nReturns\n-------\nid: int\n    更新したプリセットのプリセットID",
+        "operationId": "update_preset_update_preset_post",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/Preset"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Update Preset Update Preset Post",
+                  "type": "integer"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Update Preset",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/user_dict": {
+      "get": {
+        "description": "ユーザー辞書に登録されている単語の一覧を返します。\n単語の表層形(surface)は正規化済みの物を返します。\n\nReturns\n-------\ndict[str, UserDictWord]\n    単語のUUIDとその詳細",
+        "operationId": "get_user_dict_words_user_dict_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "additionalProperties": {
+                    "$ref": "#/components/schemas/UserDictWord"
+                  },
+                  "title": "Response Get User Dict Words User Dict Get",
+                  "type": "object"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Get User Dict Words",
+        "tags": [
+          "ユーザー辞書"
+        ]
+      }
+    },
+    "/user_dict_word": {
+      "post": {
+        "description": "ユーザー辞書に言葉を追加します。\n\nParameters\n----------\nsurface : str\n    言葉の表層形\npronunciation: str\n    言葉の発音（カタカナ）\naccent_type: int\n    アクセント型（音が下がる場所を指す）\nword_type: WordTypes, optional\n    PROPER_NOUN（固有名詞）、COMMON_NOUN（普通名詞）、VERB（動詞）、ADJECTIVE（形容詞）、SUFFIX（語尾）のいずれか\npriority: int, optional\n    単語の優先度（0から10までの整数）\n    数字が大きいほど優先度が高くなる\n    1から9までの値を指定することを推奨",
+        "operationId": "add_user_dict_word_user_dict_word_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "surface",
+            "required": true,
+            "schema": {
+              "title": "Surface",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "pronunciation",
+            "required": true,
+            "schema": {
+              "title": "Pronunciation",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "accent_type",
+            "required": true,
+            "schema": {
+              "title": "Accent Type",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "word_type",
+            "required": false,
+            "schema": {
+              "$ref": "#/components/schemas/WordTypes"
+            }
+          },
+          {
+            "in": "query",
+            "name": "priority",
+            "required": false,
+            "schema": {
+              "maximum": 10.0,
+              "minimum": 0.0,
+              "title": "Priority",
+              "type": "integer"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Add User Dict Word User Dict Word Post",
+                  "type": "string"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Add User Dict Word",
+        "tags": [
+          "ユーザー辞書"
+        ]
+      }
+    },
+    "/user_dict_word/{word_uuid}": {
+      "delete": {
+        "description": "ユーザー辞書に登録されている言葉を削除します。\n\nParameters\n----------\nword_uuid: str\n    削除する言葉のUUID",
+        "operationId": "delete_user_dict_word_user_dict_word__word_uuid__delete",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "word_uuid",
+            "required": true,
+            "schema": {
+              "title": "Word Uuid",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Delete User Dict Word",
+        "tags": [
+          "ユーザー辞書"
+        ]
+      },
+      "put": {
+        "description": "ユーザー辞書に登録されている言葉を更新します。\n\nParameters\n----------\nsurface : str\n    言葉の表層形\npronunciation: str\n    言葉の発音（カタカナ）\naccent_type: int\n    アクセント型（音が下がる場所を指す）\nword_uuid: str\n    更新する言葉のUUID\nword_type: WordTypes, optional\n    PROPER_NOUN（固有名詞）、COMMON_NOUN（普通名詞）、VERB（動詞）、ADJECTIVE（形容詞）、SUFFIX（語尾）のいずれか\npriority: int, optional\n    単語の優先度（0から10までの整数）\n    数字が大きいほど優先度が高くなる\n    1から9までの値を指定することを推奨",
+        "operationId": "rewrite_user_dict_word_user_dict_word__word_uuid__put",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "word_uuid",
+            "required": true,
+            "schema": {
+              "title": "Word Uuid",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "surface",
+            "required": true,
+            "schema": {
+              "title": "Surface",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "pronunciation",
+            "required": true,
+            "schema": {
+              "title": "Pronunciation",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "accent_type",
+            "required": true,
+            "schema": {
+              "title": "Accent Type",
+              "type": "integer"
+            }
+          },
+          {
+            "in": "query",
+            "name": "word_type",
+            "required": false,
+            "schema": {
+              "$ref": "#/components/schemas/WordTypes"
+            }
+          },
+          {
+            "in": "query",
+            "name": "priority",
+            "required": false,
+            "schema": {
+              "maximum": 10.0,
+              "minimum": 0.0,
+              "title": "Priority",
+              "type": "integer"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Rewrite User Dict Word",
+        "tags": [
+          "ユーザー辞書"
+        ]
+      }
+    },
+    "/validate_kana": {
+      "post": {
+        "description": "テキストがAquesTalk 風記法に従っているかどうかを判定します。\n従っていない場合はエラーが返ります。\n\nParameters\n----------\ntext: str\n    判定する対象の文字列",
+        "operationId": "validate_kana_validate_kana_post",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "text",
+            "required": true,
+            "schema": {
+              "title": "Text",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Validate Kana Validate Kana Post",
+                  "type": "boolean"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "400": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ParseKanaBadRequest"
+                }
+              }
+            },
+            "description": "テキストが不正です"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "テキストがAquesTalk 風記法に従っているか判定する",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/version": {
+      "get": {
+        "operationId": "version_version_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Version Version Get",
+                  "type": "string"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Version",
+        "tags": [
+          "その他"
+        ]
+      }
+    }
+  }
+}
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index 4fbb450b4..d0de01527 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 import pytest
+from fastapi import FastAPI
 from fastapi.testclient import TestClient
 from run import generate_app
 
@@ -30,5 +31,10 @@ def app_params():
 
 
 @pytest.fixture(scope="session")
-def client(app_params: dict) -> TestClient:
-    return TestClient(generate_app(**app_params))
+def app(app_params: dict) -> FastAPI:
+    return generate_app(**app_params)
+
+
+@pytest.fixture(scope="session")
+def client(app: FastAPI) -> TestClient:
+    return TestClient(app)
diff --git a/test/e2e/test_openapi.py b/test/e2e/test_openapi.py
new file mode 100644
index 000000000..005d0fd1e
--- /dev/null
+++ b/test/e2e/test_openapi.py
@@ -0,0 +1,12 @@
+from typing import Any
+
+from fastapi import FastAPI
+from syrupy.extensions.json import JSONSnapshotExtension
+
+
+def test_OpenAPIの形が変わっていないことを確認(
+    app: FastAPI, snapshot_json: JSONSnapshotExtension
+) -> None:
+    # 変更があった場合はREADMEの「スナップショットの更新」の手順で更新可能
+    openapi: Any = app.openapi()  # snapshot_jsonがmypyに対応していないのでワークアラウンド
+    assert snapshot_json == openapi

From 9e1b0f07137e67e84a766a0828f8bad10cd64cd0 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sat, 20 Jan 2024 10:06:03 +0900
Subject: [PATCH 141/177] =?UTF-8?q?style=5Fid=E5=BC=95=E6=95=B0=E3=82=84AP?=
 =?UTF-8?q?I=E3=82=92speaker=E3=81=AB=E6=88=BB=E3=81=99=20(#1016)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* style_id引数やAPIをspeakerに戻す

* READMEの戻し忘れ
---
 README.md                                     |  24 +-
 ...50\343\201\256\351\200\243\346\220\272.md" |   2 +
 run.py                                        | 136 ++--------
 ...\343\201\247\343\201\215\343\202\213.json" |   0
 ...\343\201\247\343\201\215\343\202\213.json" |  60 -----
 ...\343\202\222\347\242\272\350\252\215.json" | 238 +-----------------
 test/e2e/test_audio_query.py                  |  17 +-
 7 files changed, 47 insertions(+), 430 deletions(-)
 rename "test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\343\202\202\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" => "test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" (100%)
 delete mode 100644 "test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"

diff --git a/README.md b/README.md
index 6b9fb7cbb..23fda7416 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ echo -n "こんにちは、音声合成の世界へようこそ" >text.txt
 
 curl -s \
     -X POST \
-    "127.0.0.1:50021/audio_query?style_id=1"\
+    "127.0.0.1:50021/audio_query?speaker=1"\
     --get --data-urlencode text@text.txt \
     > query.json
 
@@ -65,13 +65,13 @@ curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @query.json \
-    "127.0.0.1:50021/synthesis?style_id=1" \
+    "127.0.0.1:50021/synthesis?speaker=1" \
     > audio.wav
 ```
 
 生成される音声はサンプリングレートが 24000Hz と少し特殊なため、音声プレーヤーによっては再生できない場合があります。
 
-`style_id` に指定する値は `/speakers` エンドポイントで得られます。
+`speaker` に指定する値は `/speakers` エンドポイントで得られる `style_id` です。互換性のために `speaker` という名前になっています。
 
 ### 読み方を AquesTalk 風記法で取得・修正
 
@@ -99,7 +99,7 @@ echo -n "ディープラーニングは万能薬ではありません" >text.txt
 
 curl -s \
     -X POST \
-    "127.0.0.1:50021/audio_query?style_id=1" \
+    "127.0.0.1:50021/audio_query?speaker=1" \
     --get --data-urlencode text@text.txt \
     > query.json
 
@@ -111,7 +111,7 @@ cat query.json | grep -o -E "\"kana\":\".*\""
 echo -n "ディイプラ'アニングワ/バンノ'オヤクデワ/アリマセ'ン" > kana.txt
 curl -s \
     -X POST \
-    "127.0.0.1:50021/accent_phrases?style_id=1&is_kana=true" \
+    "127.0.0.1:50021/accent_phrases?speaker=1&is_kana=true" \
     --get --data-urlencode text@kana.txt \
     > newphrases.json
 
@@ -122,7 +122,7 @@ curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @newquery.json \
-    "127.0.0.1:50021/synthesis?style_id=1" \
+    "127.0.0.1:50021/synthesis?speaker=1" \
     > audio.wav
 ```
 
@@ -238,7 +238,7 @@ curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @query.json \
-    "127.0.0.1:50021/synthesis?style_id=$style_id" \
+    "127.0.0.1:50021/synthesis?speaker=$style_id" \
     > audio.wav
 ```
 
@@ -255,7 +255,7 @@ echo -n "モーフィングを利用することで、２種類の声を混ぜ
 
 curl -s \
     -X POST \
-    "127.0.0.1:50021/audio_query?style_id=0"\
+    "127.0.0.1:50021/audio_query?speaker=8"\
     --get --data-urlencode text@text.txt \
     > query.json
 
@@ -264,7 +264,7 @@ curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @query.json \
-    "127.0.0.1:50021/synthesis?style_id=0" \
+    "127.0.0.1:50021/synthesis?speaker=8" \
     > audio.wav
 
 export MORPH_RATE=0.5
@@ -274,17 +274,17 @@ curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @query.json \
-    "127.0.0.1:50021/synthesis_morphing?base_style_id=0&target_style_id=1&morph_rate=$MORPH_RATE" \
+    "127.0.0.1:50021/synthesis_morphing?base_speaker=8&target_speaker=10&morph_rate=$MORPH_RATE" \
     > audio.wav
 
 export MORPH_RATE=0.9
 
-# query、base_style_id、target_style_idが同じ場合はキャッシュが使用されるため比較的高速に生成される
+# query、base_speaker、target_speakerが同じ場合はキャッシュが使用されるため比較的高速に生成される
 curl -s \
     -H "Content-Type: application/json" \
     -X POST \
     -d @query.json \
-    "127.0.0.1:50021/synthesis_morphing?base_style_id=0&target_style_id=1&morph_rate=$MORPH_RATE" \
+    "127.0.0.1:50021/synthesis_morphing?base_speaker=8&target_speaker=10&morph_rate=$MORPH_RATE" \
     > audio.wav
 ```
 
diff --git "a/docs/VOICEVOX\351\237\263\345\243\260\345\220\210\346\210\220\343\202\250\343\203\263\343\202\270\343\203\263\343\201\250\343\201\256\351\200\243\346\220\272.md" "b/docs/VOICEVOX\351\237\263\345\243\260\345\220\210\346\210\220\343\202\250\343\203\263\343\202\270\343\203\263\343\201\250\343\201\256\351\200\243\346\220\272.md"
index 21cc6d13e..540173be1 100644
--- "a/docs/VOICEVOX\351\237\263\345\243\260\345\220\210\346\210\220\343\202\250\343\203\263\343\202\270\343\203\263\343\201\250\343\201\256\351\200\243\346\220\272.md"
+++ "b/docs/VOICEVOX\351\237\263\345\243\260\345\220\210\346\210\220\343\202\250\343\203\263\343\202\270\343\203\263\343\201\250\343\201\256\351\200\243\346\220\272.md"
@@ -3,3 +3,5 @@
 - バージョンが上がっても、`/audio_query`で返ってくる値をそのまま`/synthesis`に POST すれば音声合成できるようにする予定です
   - `AudioQuery`のパラメータは増えますが、なるべくデフォルト値で以前と変わらない音声が生成されるようにします
 - バージョン 0.7 から音声スタイルが実装されました。スタイルの情報は`/speakers`から取得できます
+  - スタイルの情報にある`style_id`を`speaker`に指定することで、今まで通り音声合成ができます
+    - style_id の指定先が speaker なのは互換性のためです
diff --git a/run.py b/run.py
index 1f2c0907a..ea737353c 100644
--- a/run.py
+++ b/run.py
@@ -7,14 +7,13 @@
 import re
 import sys
 import traceback
-import warnings
 import zipfile
 from collections.abc import Awaitable, Callable
 from functools import lru_cache
 from io import BytesIO, TextIOWrapper
 from pathlib import Path
 from tempfile import NamedTemporaryFile, TemporaryFile
-from typing import Annotated, Any, Optional, TypeVar
+from typing import Annotated, Any, Optional
 
 import soundfile
 import uvicorn
@@ -94,24 +93,6 @@
 )
 from voicevox_engine.utility.run_utility import decide_boolean_from_env
 
-# NOTE: Python 3.12以降で[S: StyleId | list[StyleId]]に置き換えられる
-S = TypeVar("S", StyleId, list[StyleId])
-
-
-def get_style_id_from_deprecated(style_id: S | None, deprecated_speaker: S | None) -> S:
-    """
-    style_idとspeaker両方ともNoneかNoneでないかをチェックし、
-    どちらか片方しかNoneが存在しなければstyle_idを返す
-    """
-    if deprecated_speaker is not None and style_id is None:
-        warnings.warn("speakerは非推奨です。style_idを利用してください。", stacklevel=1)
-        return deprecated_speaker
-    elif style_id is not None and deprecated_speaker is None:
-        return style_id
-    raise HTTPException(
-        status_code=400, detail="speakerとstyle_idが両方とも存在しないか、両方とも存在しています。"
-    )
-
 
 def b64encode_str(s):
     return base64.b64encode(s).decode("utf-8")
@@ -290,16 +271,12 @@ def get_core(core_version: Optional[str]) -> CoreAdapter:
     )
     def audio_query(
         text: str,
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         core_version: str | None = None,
     ) -> AudioQuery:
         """
         音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
         """
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         engine = get_engine(core_version)
         core = get_core(core_version)
         accent_phrases = engine.create_accent_phrases(text, style_id)
@@ -371,8 +348,7 @@ def audio_query_from_preset(
     )
     def accent_phrases(
         text: str,
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         is_kana: bool = False,
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
@@ -385,9 +361,6 @@ def accent_phrases(
         * アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。
         * アクセント句末に`？`(全角)を入れることにより疑問文の発音ができる。
         """
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         engine = get_engine(core_version)
         if is_kana:
             try:
@@ -407,13 +380,9 @@ def accent_phrases(
     )
     def mora_data(
         accent_phrases: list[AccentPhrase],
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         engine = get_engine(core_version)
         return engine.update_length_and_pitch(accent_phrases, style_id)
 
@@ -425,13 +394,9 @@ def mora_data(
     )
     def mora_length(
         accent_phrases: list[AccentPhrase],
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         engine = get_engine(core_version)
         return engine.update_length(accent_phrases, style_id)
 
@@ -443,13 +408,9 @@ def mora_length(
     )
     def mora_pitch(
         accent_phrases: list[AccentPhrase],
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         core_version: str | None = None,
     ) -> list[AccentPhrase]:
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         engine = get_engine(core_version)
         return engine.update_pitch(accent_phrases, style_id)
 
@@ -468,17 +429,13 @@ def mora_pitch(
     )
     def synthesis(
         query: AudioQuery,
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         enable_interrogative_upspeak: bool = Query(  # noqa: B008
             default=True,
             description="疑問系のテキストが与えられたら語尾を自動調整する",
         ),
         core_version: str | None = None,
     ) -> FileResponse:
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         engine = get_engine(core_version)
         wave = engine.synthesize_wave(
             query, style_id, enable_interrogative_upspeak=enable_interrogative_upspeak
@@ -511,13 +468,9 @@ def synthesis(
     def cancellable_synthesis(
         query: AudioQuery,
         request: Request,
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         if cancellable_engine is None:
             raise HTTPException(
                 status_code=404,
@@ -552,13 +505,9 @@ def cancellable_synthesis(
     )
     def multi_synthesis(
         queries: list[AudioQuery],
-        style_id: StyleId | None = None,
-        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
-        style_id = get_style_id_from_deprecated(
-            style_id=style_id, deprecated_speaker=speaker
-        )
         engine = get_engine(core_version)
         sampling_rate = queries[0].outputSamplingRate
 
@@ -634,14 +583,8 @@ def morphable_targets(
     )
     def _synthesis_morphing(
         query: AudioQuery,
-        base_style_id: StyleId | None = None,
-        base_speaker: (StyleId | None) = Query(  # noqa: B008
-            default=None, deprecated=True
-        ),
-        target_style_id: StyleId | None = None,
-        target_speaker: (StyleId | None) = Query(  # noqa: B008
-            default=None, deprecated=True
-        ),
+        base_style_id: StyleId = Query(alias="base_speaker"),  # noqa: B008
+        target_style_id: StyleId = Query(alias="target_speaker"),  # noqa: B008
         morph_rate: float = Query(..., ge=0.0, le=1.0),  # noqa: B008
         core_version: str | None = None,
     ) -> FileResponse:
@@ -649,12 +592,6 @@ def _synthesis_morphing(
         指定された2種類のスタイルで音声を合成、指定した割合でモーフィングした音声を得ます。
         モーフィングの割合は`morph_rate`で指定でき、0.0でベースのスタイル、1.0でターゲットのスタイルに近づきます。
         """
-        base_style_id = get_style_id_from_deprecated(
-            style_id=base_style_id, deprecated_speaker=base_speaker
-        )
-        target_style_id = get_style_id_from_deprecated(
-            style_id=target_style_id, deprecated_speaker=target_speaker
-        )
         engine = get_engine(core_version)
         core = get_core(core_version)
 
@@ -1028,68 +965,33 @@ def uninstall_library(library_uuid: str) -> Response:
             library_manager.uninstall_library(library_uuid)
             return Response(status_code=204)
 
-    @app.post("/initialize_style_id", status_code=204, tags=["その他"])
-    def initialize_style_id(
-        style_id: StyleId,
+    @app.post("/initialize_speaker", status_code=204, tags=["その他"])
+    def initialize_speaker(
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         skip_reinit: bool = Query(  # noqa: B008
             default=False, description="既に初期化済みのスタイルの再初期化をスキップするかどうか"
         ),
         core_version: str | None = None,
     ) -> Response:
         """
-        指定されたstyle_idのスタイルを初期化します。
+        指定されたスタイルを初期化します。
         実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。
         """
         core = get_core(core_version)
         core.initialize_style_id_synthesis(style_id, skip_reinit=skip_reinit)
         return Response(status_code=204)
 
-    @app.get("/is_initialized_style_id", response_model=bool, tags=["その他"])
-    def is_initialized_style_id(
-        style_id: StyleId,
+    @app.get("/is_initialized_speaker", response_model=bool, tags=["その他"])
+    def is_initialized_speaker(
+        style_id: StyleId = Query(alias="speaker"),  # noqa: B008
         core_version: str | None = None,
     ) -> bool:
         """
-        指定されたstyle_idのスタイルが初期化されているかどうかを返します。
+        指定されたスタイルが初期化されているかどうかを返します。
         """
         core = get_core(core_version)
         return core.is_initialized_style_id_synthesis(style_id)
 
-    @app.post("/initialize_speaker", status_code=204, tags=["その他"], deprecated=True)
-    def initialize_speaker(
-        speaker: StyleId,
-        skip_reinit: bool = Query(  # noqa: B008
-            default=False, description="既に初期化済みの話者の再初期化をスキップするかどうか"
-        ),
-        core_version: str | None = None,
-    ) -> Response:
-        """
-        こちらのAPIは非推奨です。`initialize_style_id`を利用してください。
-        """
-        warnings.warn(
-            "使用しているAPI(/initialize_speaker)は非推奨です。/initialized_style_idを利用してください。",
-            stacklevel=1,
-        )
-        return initialize_style_id(
-            speaker, skip_reinit=skip_reinit, core_version=core_version
-        )
-
-    @app.get(
-        "/is_initialized_speaker", response_model=bool, tags=["その他"], deprecated=True
-    )
-    def is_initialized_speaker(
-        speaker: StyleId,
-        core_version: str | None = None,
-    ) -> bool:
-        """
-        こちらのAPIは非推奨です。`is_initialize_style_id`を利用してください。
-        """
-        warnings.warn(
-            "使用しているAPI(/is_initialize_speaker)は非推奨です。/is_initialized_style_idを利用してください。",
-            stacklevel=1,
-        )
-        return is_initialized_style_id(speaker, core_version=core_version)
-
     @app.get("/user_dict", response_model=dict[str, UserDictWord], tags=["ユーザー辞書"])
     def get_user_dict_words() -> dict[str, UserDictWord]:
         """
diff --git "a/test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\343\202\202\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
similarity index 100%
rename from "test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\343\202\202\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
rename to "test/e2e/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
diff --git "a/test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
deleted file mode 100644
index e47234dce..000000000
--- "a/test/e2e/__snapshots__/test_audio_query/test_style_id\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-  "accent_phrases": [
-    {
-      "accent": 1,
-      "is_interrogative": false,
-      "moras": [
-        {
-          "consonant": "t",
-          "consonant_length": 2.31,
-          "pitch": 3.38,
-          "text": "テ",
-          "vowel": "e",
-          "vowel_length": 0.88
-        },
-        {
-          "consonant": "s",
-          "consonant_length": 2.19,
-          "pitch": 0.0,
-          "text": "ス",
-          "vowel": "U",
-          "vowel_length": 0.38
-        },
-        {
-          "consonant": "t",
-          "consonant_length": 2.31,
-          "pitch": 4.19,
-          "text": "ト",
-          "vowel": "o",
-          "vowel_length": 1.88
-        },
-        {
-          "consonant": "d",
-          "consonant_length": 0.75,
-          "pitch": 1.62,
-          "text": "デ",
-          "vowel": "e",
-          "vowel_length": 0.88
-        },
-        {
-          "consonant": "s",
-          "consonant_length": 2.19,
-          "pitch": 0.0,
-          "text": "ス",
-          "vowel": "U",
-          "vowel_length": 0.38
-        }
-      ],
-      "pause_mora": null
-    }
-  ],
-  "intonationScale": 1.0,
-  "kana": "テ'_ストデ_ス",
-  "outputSamplingRate": 24000,
-  "outputStereo": false,
-  "pitchScale": 0.0,
-  "postPhonemeLength": 0.1,
-  "prePhonemeLength": 0.1,
-  "speedScale": 1.0,
-  "volumeScale": 1.0
-}
diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index 0cf9b7443..84083909c 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -981,19 +981,9 @@
             }
           },
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -1119,19 +1109,9 @@
             }
           },
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -1241,19 +1221,9 @@
         "operationId": "cancellable_synthesis_cancellable_synthesis_post",
         "parameters": [
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -1515,8 +1485,7 @@
     },
     "/initialize_speaker": {
       "post": {
-        "deprecated": true,
-        "description": "こちらのAPIは非推奨です。`initialize_style_id`を利用してください。",
+        "description": "指定されたスタイルを初期化します。\n実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。",
         "operationId": "initialize_speaker_initialize_speaker_post",
         "parameters": [
           {
@@ -1528,63 +1497,6 @@
               "type": "integer"
             }
           },
-          {
-            "description": "既に初期化済みの話者の再初期化をスキップするかどうか",
-            "in": "query",
-            "name": "skip_reinit",
-            "required": false,
-            "schema": {
-              "default": false,
-              "description": "既に初期化済みの話者の再初期化をスキップするかどうか",
-              "title": "Skip Reinit",
-              "type": "boolean"
-            }
-          },
-          {
-            "in": "query",
-            "name": "core_version",
-            "required": false,
-            "schema": {
-              "title": "Core Version",
-              "type": "string"
-            }
-          }
-        ],
-        "responses": {
-          "204": {
-            "description": "Successful Response"
-          },
-          "422": {
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
-                }
-              }
-            },
-            "description": "Validation Error"
-          }
-        },
-        "summary": "Initialize Speaker",
-        "tags": [
-          "その他"
-        ]
-      }
-    },
-    "/initialize_style_id": {
-      "post": {
-        "description": "指定されたstyle_idのスタイルを初期化します。\n実行しなくても他のAPIは使用できますが、初回実行時に時間がかかることがあります。",
-        "operationId": "initialize_style_id_initialize_style_id_post",
-        "parameters": [
-          {
-            "in": "query",
-            "name": "style_id",
-            "required": true,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
           {
             "description": "既に初期化済みのスタイルの再初期化をスキップするかどうか",
             "in": "query",
@@ -1622,7 +1534,7 @@
             "description": "Validation Error"
           }
         },
-        "summary": "Initialize Style Id",
+        "summary": "Initialize Speaker",
         "tags": [
           "その他"
         ]
@@ -1692,8 +1604,7 @@
     },
     "/is_initialized_speaker": {
       "get": {
-        "deprecated": true,
-        "description": "こちらのAPIは非推奨です。`is_initialize_style_id`を利用してください。",
+        "description": "指定されたスタイルが初期化されているかどうかを返します。",
         "operationId": "is_initialized_speaker_is_initialized_speaker_get",
         "parameters": [
           {
@@ -1744,77 +1655,14 @@
         ]
       }
     },
-    "/is_initialized_style_id": {
-      "get": {
-        "description": "指定されたstyle_idのスタイルが初期化されているかどうかを返します。",
-        "operationId": "is_initialized_style_id_is_initialized_style_id_get",
-        "parameters": [
-          {
-            "in": "query",
-            "name": "style_id",
-            "required": true,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "in": "query",
-            "name": "core_version",
-            "required": false,
-            "schema": {
-              "title": "Core Version",
-              "type": "string"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "content": {
-              "application/json": {
-                "schema": {
-                  "title": "Response Is Initialized Style Id Is Initialized Style Id Get",
-                  "type": "boolean"
-                }
-              }
-            },
-            "description": "Successful Response"
-          },
-          "422": {
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
-                }
-              }
-            },
-            "description": "Validation Error"
-          }
-        },
-        "summary": "Is Initialized Style Id",
-        "tags": [
-          "その他"
-        ]
-      }
-    },
     "/mora_data": {
       "post": {
         "operationId": "mora_data_mora_data_post",
         "parameters": [
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -1881,19 +1729,9 @@
         "operationId": "mora_length_mora_length_post",
         "parameters": [
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -1960,19 +1798,9 @@
         "operationId": "mora_pitch_mora_pitch_post",
         "parameters": [
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -2103,19 +1931,9 @@
         "operationId": "multi_synthesis_multi_synthesis_post",
         "parameters": [
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -2393,19 +2211,9 @@
         "operationId": "synthesis_synthesis_post",
         "parameters": [
           {
-            "in": "query",
-            "name": "style_id",
-            "required": false,
-            "schema": {
-              "title": "Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Speaker",
               "type": "integer"
@@ -2478,38 +2286,18 @@
         "operationId": "_synthesis_morphing_synthesis_morphing_post",
         "parameters": [
           {
-            "in": "query",
-            "name": "base_style_id",
-            "required": false,
-            "schema": {
-              "title": "Base Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "base_speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Base Speaker",
               "type": "integer"
             }
           },
           {
-            "in": "query",
-            "name": "target_style_id",
-            "required": false,
-            "schema": {
-              "title": "Target Style Id",
-              "type": "integer"
-            }
-          },
-          {
-            "deprecated": true,
             "in": "query",
             "name": "target_speaker",
-            "required": false,
+            "required": true,
             "schema": {
               "title": "Target Speaker",
               "type": "integer"
diff --git a/test/e2e/test_audio_query.py b/test/e2e/test_audio_query.py
index bcab2fea1..6f8c6fa86 100644
--- a/test/e2e/test_audio_query.py
+++ b/test/e2e/test_audio_query.py
@@ -8,24 +8,9 @@
 from syrupy.extensions.json import JSONSnapshotExtension
 
 
-def test_style_idを指定して音声合成クエリが取得できる(
-    client: TestClient, snapshot_json: JSONSnapshotExtension
-) -> None:
-    response = client.post("/audio_query", params={"text": "テストです", "style_id": 0})
-    assert response.status_code == 200
-    assert snapshot_json == round_floats(response.json(), round_value=2)
-
-
-def test_speakerを指定しても音声合成クエリが取得できる(
+def test_speakerを指定して音声合成クエリが取得できる(
     client: TestClient, snapshot_json: JSONSnapshotExtension
 ) -> None:
     response = client.post("/audio_query", params={"text": "テストです", "speaker": 0})
     assert response.status_code == 200
     assert snapshot_json == round_floats(response.json(), round_value=2)
-
-
-def test_style_idとspeakerを両方指定するとエラー(client: TestClient) -> None:
-    response = client.post(
-        "/audio_query", params={"text": "テストです", "style_id": 0, "speaker": 0}
-    )
-    assert response.status_code == 400

From 8203ac486dbd909919d30015ee359e310eb7e33f Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 21 Jan 2024 00:44:31 +0900
Subject: [PATCH 142/177] [release-0.15] to 0.15.0 (#1018)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ライブラリ管理機能をオフに

* to 0.15.0

* 文言微調整

* 「ビルドの改善」は「開発環境の向上」と統一に

* snapshotテストを更新

* テスト更新
---
 engine_manifest.json                          |   3 +-
 engine_manifest_assets/update_infos.json      |  33 +++
 ...\343\202\222\347\242\272\350\252\215.json" | 240 ------------------
 test/e2e/test_disable_api.py                  |   6 +-
 4 files changed, 38 insertions(+), 244 deletions(-)

diff --git a/engine_manifest.json b/engine_manifest.json
index f4a5dfaa1..bf5db2d44 100644
--- a/engine_manifest.json
+++ b/engine_manifest.json
@@ -12,7 +12,6 @@
     "terms_of_service": "engine_manifest_assets/terms_of_service.md",
     "update_infos": "engine_manifest_assets/update_infos.json",
     "dependency_licenses": "engine_manifest_assets/dependency_licenses.json",
-    "supported_vvlib_manifest_version": "0.15.0",
     "supported_features": {
         "adjust_mora_pitch": {
             "type": "bool",
@@ -56,7 +55,7 @@
         },
         "manage_library": {
             "type": "bool",
-            "value": true,
+            "value": false,
             "name": "音声ライブラリのインストール・アンインストール"
         }
     }
diff --git a/engine_manifest_assets/update_infos.json b/engine_manifest_assets/update_infos.json
index b465a8b36..5c876d4d8 100644
--- a/engine_manifest_assets/update_infos.json
+++ b/engine_manifest_assets/update_infos.json
@@ -1,4 +1,37 @@
 [
+  {
+    "version": "0.15.0",
+    "descriptions": [
+      "/validate_kana APIを追加",
+      "起動時のエンジン設定項目追加",
+      "ユーザー辞書のインポート・エクスポート機能追加",
+      "ビルド成果物のディレクトリ構造を変更",
+      "書き込み系APIを一括で無効化可能に",
+      "開発環境の向上",
+      "バグ修正"
+    ],
+    "contributors": [
+      "aoirint",
+      "FujisakiEx",
+      "Hiroshiba",
+      "K-shir0",
+      "My-MC",
+      "nagi-miaow",
+      "okaits",
+      "raa0121",
+      "sabonerune",
+      "sevenc-nanashi",
+      "siketyan",
+      "stmtk1",
+      "takana-v",
+      "tarepan",
+      "tomoish",
+      "tuna2134",
+      "weweweok",
+      "whiteball",
+      "y-chan"
+    ]
+  },
   {
     "version": "0.14.7",
     "descriptions": [
diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index 84083909c..d20acdf92 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -164,48 +164,6 @@
         "title": "CorsPolicyMode",
         "type": "string"
       },
-      "DownloadableLibraryInfo": {
-        "description": "ダウンロード可能な音声ライブラリの情報",
-        "properties": {
-          "bytes": {
-            "title": "音声ライブラリのバイト数",
-            "type": "integer"
-          },
-          "download_url": {
-            "title": "音声ライブラリのダウンロードURL",
-            "type": "string"
-          },
-          "name": {
-            "title": "音声ライブラリの名前",
-            "type": "string"
-          },
-          "speakers": {
-            "items": {
-              "$ref": "#/components/schemas/LibrarySpeaker"
-            },
-            "title": "音声ライブラリに含まれる話者のリスト",
-            "type": "array"
-          },
-          "uuid": {
-            "title": "音声ライブラリのUUID",
-            "type": "string"
-          },
-          "version": {
-            "title": "音声ライブラリのバージョン",
-            "type": "string"
-          }
-        },
-        "required": [
-          "name",
-          "uuid",
-          "version",
-          "download_url",
-          "bytes",
-          "speakers"
-        ],
-        "title": "DownloadableLibraryInfo",
-        "type": "object"
-      },
       "EngineManifest": {
         "description": "エンジン自体に関する情報",
         "properties": {
@@ -297,80 +255,6 @@
         "title": "HTTPValidationError",
         "type": "object"
       },
-      "InstalledLibraryInfo": {
-        "description": "インストール済み音声ライブラリの情報",
-        "properties": {
-          "bytes": {
-            "title": "音声ライブラリのバイト数",
-            "type": "integer"
-          },
-          "download_url": {
-            "title": "音声ライブラリのダウンロードURL",
-            "type": "string"
-          },
-          "name": {
-            "title": "音声ライブラリの名前",
-            "type": "string"
-          },
-          "speakers": {
-            "items": {
-              "$ref": "#/components/schemas/LibrarySpeaker"
-            },
-            "title": "音声ライブラリに含まれる話者のリスト",
-            "type": "array"
-          },
-          "uninstallable": {
-            "title": "アンインストール可能かどうか",
-            "type": "boolean"
-          },
-          "uuid": {
-            "title": "音声ライブラリのUUID",
-            "type": "string"
-          },
-          "version": {
-            "title": "音声ライブラリのバージョン",
-            "type": "string"
-          }
-        },
-        "required": [
-          "name",
-          "uuid",
-          "version",
-          "download_url",
-          "bytes",
-          "speakers",
-          "uninstallable"
-        ],
-        "title": "InstalledLibraryInfo",
-        "type": "object"
-      },
-      "LibrarySpeaker": {
-        "description": "音声ライブラリに含まれる話者の情報",
-        "properties": {
-          "speaker": {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Speaker"
-              }
-            ],
-            "title": "話者情報"
-          },
-          "speaker_info": {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/SpeakerInfo"
-              }
-            ],
-            "title": "話者の追加情報"
-          }
-        },
-        "required": [
-          "speaker",
-          "speaker_info"
-        ],
-        "title": "LibrarySpeaker",
-        "type": "object"
-      },
       "LicenseInfo": {
         "description": "依存ライブラリのライセンス情報",
         "properties": {
@@ -1386,32 +1270,6 @@
         ]
       }
     },
-    "/downloadable_libraries": {
-      "get": {
-        "description": "ダウンロード可能な音声ライブラリの情報を返します。\n\nReturns\n-------\nret_data: list[DownloadableLibrary]",
-        "operationId": "downloadable_libraries_downloadable_libraries_get",
-        "responses": {
-          "200": {
-            "content": {
-              "application/json": {
-                "schema": {
-                  "items": {
-                    "$ref": "#/components/schemas/DownloadableLibraryInfo"
-                  },
-                  "title": "Response Downloadable Libraries Downloadable Libraries Get",
-                  "type": "array"
-                }
-              }
-            },
-            "description": "Successful Response"
-          }
-        },
-        "summary": "Downloadable Libraries",
-        "tags": [
-          "音声ライブラリ管理"
-        ]
-      }
-    },
     "/engine_manifest": {
       "get": {
         "operationId": "engine_manifest_engine_manifest_get",
@@ -1540,68 +1398,6 @@
         ]
       }
     },
-    "/install_library/{library_uuid}": {
-      "post": {
-        "description": "音声ライブラリをインストールします。\n音声ライブラリのZIPファイルをリクエストボディとして送信してください。\n\nParameters\n----------\nlibrary_uuid: str\n    音声ライブラリのID",
-        "operationId": "install_library_install_library__library_uuid__post",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "library_uuid",
-            "required": true,
-            "schema": {
-              "title": "Library Uuid",
-              "type": "string"
-            }
-          }
-        ],
-        "responses": {
-          "204": {
-            "description": "Successful Response"
-          },
-          "422": {
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
-                }
-              }
-            },
-            "description": "Validation Error"
-          }
-        },
-        "summary": "Install Library",
-        "tags": [
-          "音声ライブラリ管理"
-        ]
-      }
-    },
-    "/installed_libraries": {
-      "get": {
-        "description": "インストールした音声ライブラリの情報を返します。\n\nReturns\n-------\nret_data: dict[str, InstalledLibrary]",
-        "operationId": "installed_libraries_installed_libraries_get",
-        "responses": {
-          "200": {
-            "content": {
-              "application/json": {
-                "schema": {
-                  "additionalProperties": {
-                    "$ref": "#/components/schemas/InstalledLibraryInfo"
-                  },
-                  "title": "Response Installed Libraries Installed Libraries Get",
-                  "type": "object"
-                }
-              }
-            },
-            "description": "Successful Response"
-          }
-        },
-        "summary": "Installed Libraries",
-        "tags": [
-          "音声ライブラリ管理"
-        ]
-      }
-    },
     "/is_initialized_speaker": {
       "get": {
         "description": "指定されたスタイルが初期化されているかどうかを返します。",
@@ -2363,42 +2159,6 @@
         ]
       }
     },
-    "/uninstall_library/{library_uuid}": {
-      "post": {
-        "description": "音声ライブラリをアンインストールします。\n\nParameters\n----------\nlibrary_uuid: str\n    音声ライブラリのID",
-        "operationId": "uninstall_library_uninstall_library__library_uuid__post",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "library_uuid",
-            "required": true,
-            "schema": {
-              "title": "Library Uuid",
-              "type": "string"
-            }
-          }
-        ],
-        "responses": {
-          "204": {
-            "description": "Successful Response"
-          },
-          "422": {
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
-                }
-              }
-            },
-            "description": "Validation Error"
-          }
-        },
-        "summary": "Uninstall Library",
-        "tags": [
-          "音声ライブラリ管理"
-        ]
-      }
-    },
     "/update_preset": {
       "post": {
         "description": "既存のプリセットを更新します\n\nParameters\n-------\npreset: Preset\n    更新するプリセット。\n    プリセットIDが更新対象と一致している必要があります。\n\nReturns\n-------\nid: int\n    更新したプリセットのプリセットID",
diff --git a/test/e2e/test_disable_api.py b/test/e2e/test_disable_api.py
index 27dd292f8..bdd16cbf8 100644
--- a/test/e2e/test_disable_api.py
+++ b/test/e2e/test_disable_api.py
@@ -36,14 +36,16 @@ def test_disable_mutable_api(app_params: dict) -> None:
     _assert_request_and_response_403(client, "post", "/add_preset")
     _assert_request_and_response_403(client, "post", "/update_preset")
     _assert_request_and_response_403(client, "post", "/delete_preset")
-    _assert_request_and_response_403(client, "post", "/install_library/dummy")
-    _assert_request_and_response_403(client, "post", "/uninstall_library/dummy")
     _assert_request_and_response_403(client, "post", "/user_dict_word")
     _assert_request_and_response_403(client, "put", "/user_dict_word/dummy")
     _assert_request_and_response_403(client, "delete", "/user_dict_word/dummy")
     _assert_request_and_response_403(client, "post", "/import_user_dict")
     _assert_request_and_response_403(client, "post", "/setting")
 
+    # FIXME: EngineManifestをDI可能にし、EngineManifestに従ってこれらのAPIを加える
+    # _assert_request_and_response_403(client, "post", "/install_library/dummy")
+    # _assert_request_and_response_403(client, "post", "/uninstall_library/dummy")
+
     # 他のAPIは有効
     response = client.get("/version")
     assert response.status_code == 200

From ed0cbe4fce070e327c12c2cc8063ca4a4da3bf98 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 21 Jan 2024 00:50:53 +0900
Subject: [PATCH 143/177] =?UTF-8?q?[release-0.15]=20=E3=83=93=E3=83=AB?=
 =?UTF-8?q?=E3=83=89=E5=BE=8C=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE=E4=BF=AE?=
 =?UTF-8?q?=E6=AD=A3=E6=BC=8F=E3=82=8C=20(#1019)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ビルド後テストの修正漏れ
---
 build_util/check_release_build.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build_util/check_release_build.py b/build_util/check_release_build.py
index ae14ff1af..a04c53d39 100644
--- a/build_util/check_release_build.py
+++ b/build_util/check_release_build.py
@@ -34,14 +34,14 @@ def test_release_build(dist_dir: Path, skip_run_process: bool) -> None:
     # テキスト -> クエリ
     text = "こんにちは、音声合成の世界へようこそ"
     req = Request(
-        base_url + "audio_query?" + urlencode({"style_id": "1", "text": text}),
+        base_url + "audio_query?" + urlencode({"speaker": "1", "text": text}),
         method="POST",
     )
     with urlopen(req) as res:
         query = json.loads(res.read().decode("utf-8"))
 
     # クエリ -> 音声
-    req = Request(base_url + "synthesis?style_id=1", method="POST")
+    req = Request(base_url + "synthesis?speaker=1", method="POST")
     req.add_header("Content-Type", "application/json")
     req.data = json.dumps(query).encode("utf-8")
     with urlopen(req) as res:

From e9d805baaa383769fcf652aa1e59909c5bee3f1b Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 21 Jan 2024 17:17:06 +0900
Subject: [PATCH 144/177] =?UTF-8?q?[release-0.15]=20#857=20=E3=82=92revert?=
 =?UTF-8?q?=20(#1023)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert "BLD: PyInstallerをv6へ更新 (#857)"

This reverts commit 4ef4218822de666ea1272338061a14ffd8f690a3.
---
 .github/workflows/build.yml             |  8 +--
 poetry.lock                             | 61 +++++++++--------
 pyproject.toml                          |  2 +-
 requirements-dev.txt                    | 10 +--
 run.py                                  |  3 +-
 run.spec                                | 88 ++++++++++---------------
 voicevox_engine/user_dict.py            |  4 +-
 voicevox_engine/utility/__init__.py     |  5 +-
 voicevox_engine/utility/path_utility.py | 51 ++++----------
 9 files changed, 94 insertions(+), 138 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 45cde827c..d16170dd6 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -464,10 +464,10 @@ jobs:
             LIBONNXRUNTIME_PATH=download/onnxruntime/lib/libonnxruntime.so
           fi
 
-          pyinstaller --noconfirm run.spec -- \
-            --libcore_path="$LIBCORE_PATH" \
-            --libonnxruntime_path="$LIBONNXRUNTIME_PATH" \
-            --core_model_dir_path="download/core/model"
+          CORE_MODEL_DIR_PATH="download/core/model" \
+          LIBCORE_PATH="$LIBCORE_PATH" \
+          LIBONNXRUNTIME_PATH="$LIBONNXRUNTIME_PATH" \
+          pyinstaller --noconfirm run.spec
 
       - name: Gather DLL dependencies to dist/run/ (Windows)
         if: startsWith(matrix.os, 'windows-')
diff --git a/poetry.lock b/poetry.lock
index c36d1a59d..f32b58cd3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2,13 +2,13 @@
 
 [[package]]
 name = "altgraph"
-version = "0.17.4"
+version = "0.17.3"
 description = "Python graph (network) package"
 optional = false
 python-versions = "*"
 files = [
-    {file = "altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff"},
-    {file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"},
+    {file = "altgraph-0.17.3-py2.py3-none-any.whl", hash = "sha256:c8ac1ca6772207179ed8003ce7687757c04b0b71536f81e2ac5755c6226458fe"},
+    {file = "altgraph-0.17.3.tar.gz", hash = "sha256:ad33358114df7c9416cdb8fa1eaa5852166c505118717021c6a8c7c7abbd03dd"},
 ]
 
 [[package]]
@@ -1009,13 +1009,13 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)",
 
 [[package]]
 name = "macholib"
-version = "1.16.3"
+version = "1.16.2"
 description = "Mach-O header analysis and editing"
 optional = false
 python-versions = "*"
 files = [
-    {file = "macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c"},
-    {file = "macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30"},
+    {file = "macholib-1.16.2-py2.py3-none-any.whl", hash = "sha256:44c40f2cd7d6726af8fa6fe22549178d3a4dfecc35a9cd15ea916d9c83a688e0"},
+    {file = "macholib-1.16.2.tar.gz", hash = "sha256:557bbfa1bb255c20e9abafe7ed6cd8046b48d9525db2f9b77d3122a63a2a8bf8"},
 ]
 
 [package.dependencies]
@@ -1601,47 +1601,46 @@ files = [
 
 [[package]]
 name = "pyinstaller"
-version = "6.2.0"
+version = "5.13.2"
 description = "PyInstaller bundles a Python application and all its dependencies into a single package."
 optional = false
-python-versions = "<3.13,>=3.8"
+python-versions = "<3.13,>=3.7"
 files = [
-    {file = "pyinstaller-6.2.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:a1adbd3cf25dc90926d783eae0f444d65cdfecc7bcdf6da522c3ae3ff47b4c25"},
-    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:29d164394f1e949072f78a64c1e040f1c47b7f4aff08514c7666a031c8b44996"},
-    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_i686.whl", hash = "sha256:ba602a38d7403de89c38b8956b221ce6de0280730d269bab522492fcad82ee33"},
-    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:ebac06d99b80d2035594c3cc2fb5f2612d86289edd0510dbcbeb20a873f51d5a"},
-    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:fcfabc0ff1d38a4262c051dea3fdc1f7f106405c1f1b491b4c79cd28df19cab6"},
-    {file = "pyinstaller-6.2.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:104430686149b2f1c135b2c17aa2967c85d54ef77dc92feb4e179ec846c0c467"},
-    {file = "pyinstaller-6.2.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:e87fd60292b53bb9965cb5a84122875469a2bd475fd0d0db0052a3f1be351f75"},
-    {file = "pyinstaller-6.2.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:8ec9d6c98972bb922cedb16a6638257aa66e5deadd79e2953f3464696237c413"},
-    {file = "pyinstaller-6.2.0-py3-none-win32.whl", hash = "sha256:e5561e9a9b946d835c8dbc11ae4c16cc21e62bc77d10cc043406dc2992dfb4c6"},
-    {file = "pyinstaller-6.2.0-py3-none-win_amd64.whl", hash = "sha256:3b586196277c4c54b69880650984c39c28bb6258c2b4b64200032e6ac69d53a0"},
-    {file = "pyinstaller-6.2.0-py3-none-win_arm64.whl", hash = "sha256:d0c87b605bf13c3a04dfaa1d2fa7cd36765b8137000eeadccba865e1d6a19bf0"},
-    {file = "pyinstaller-6.2.0.tar.gz", hash = "sha256:1ce77043929bf525be38289d78feecde0fcf15506215eda6500176a8715c5047"},
+    {file = "pyinstaller-5.13.2-py3-none-macosx_10_13_universal2.whl", hash = "sha256:16cbd66b59a37f4ee59373a003608d15df180a0d9eb1a29ff3bfbfae64b23d0f"},
+    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8f6dd0e797ae7efdd79226f78f35eb6a4981db16c13325e962a83395c0ec7420"},
+    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_i686.whl", hash = "sha256:65133ed89467edb2862036b35d7c5ebd381670412e1e4361215e289c786dd4e6"},
+    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:7d51734423685ab2a4324ab2981d9781b203dcae42839161a9ee98bfeaabdade"},
+    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:2c2fe9c52cb4577a3ac39626b84cf16cf30c2792f785502661286184f162ae0d"},
+    {file = "pyinstaller-5.13.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c63ef6133eefe36c4b2f4daf4cfea3d6412ece2ca218f77aaf967e52a95ac9b8"},
+    {file = "pyinstaller-5.13.2-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:aadafb6f213549a5906829bb252e586e2cf72a7fbdb5731810695e6516f0ab30"},
+    {file = "pyinstaller-5.13.2-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:b2e1c7f5cceb5e9800927ddd51acf9cc78fbaa9e79e822c48b0ee52d9ce3c892"},
+    {file = "pyinstaller-5.13.2-py3-none-win32.whl", hash = "sha256:421cd24f26144f19b66d3868b49ed673176765f92fa9f7914cd2158d25b6d17e"},
+    {file = "pyinstaller-5.13.2-py3-none-win_amd64.whl", hash = "sha256:ddcc2b36052a70052479a9e5da1af067b4496f43686ca3cdda99f8367d0627e4"},
+    {file = "pyinstaller-5.13.2-py3-none-win_arm64.whl", hash = "sha256:27cd64e7cc6b74c5b1066cbf47d75f940b71356166031deb9778a2579bb874c6"},
+    {file = "pyinstaller-5.13.2.tar.gz", hash = "sha256:c8e5d3489c3a7cc5f8401c2d1f48a70e588f9967e391c3b06ddac1f685f8d5d2"},
 ]
 
 [package.dependencies]
 altgraph = "*"
 macholib = {version = ">=1.8", markers = "sys_platform == \"darwin\""}
-packaging = ">=22.0"
 pefile = {version = ">=2022.5.30", markers = "sys_platform == \"win32\""}
 pyinstaller-hooks-contrib = ">=2021.4"
 pywin32-ctypes = {version = ">=0.2.1", markers = "sys_platform == \"win32\""}
 setuptools = ">=42.0.0"
 
 [package.extras]
-completion = ["argcomplete"]
+encryption = ["tinyaes (>=1.0.0)"]
 hook-testing = ["execnet (>=1.5.0)", "psutil", "pytest (>=2.7.3)"]
 
 [[package]]
 name = "pyinstaller-hooks-contrib"
-version = "2023.10"
+version = "2023.7"
 description = "Community maintained hooks for PyInstaller"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pyinstaller-hooks-contrib-2023.10.tar.gz", hash = "sha256:4b4a998036abb713774cb26534ca06b7e6e09e4c628196017a10deb11a48747f"},
-    {file = "pyinstaller_hooks_contrib-2023.10-py2.py3-none-any.whl", hash = "sha256:6dc1786a8f452941245d5bb85893e2a33632ebdcbc4c23eea41f2ee08281b0c0"},
+    {file = "pyinstaller-hooks-contrib-2023.7.tar.gz", hash = "sha256:0c436a4c3506020e34116a8a7ddfd854c1ad6ddca9a8cd84500bd6e69c9e68f9"},
+    {file = "pyinstaller_hooks_contrib-2023.7-py2.py3-none-any.whl", hash = "sha256:3c10df14c0f71ab388dfbf1625375b087e7330d9444cbfd2b310ba027fa0cff0"},
 ]
 
 [[package]]
@@ -2034,19 +2033,19 @@ files = [
 
 [[package]]
 name = "setuptools"
-version = "69.0.2"
+version = "68.1.2"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"},
-    {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"},
+    {file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"},
+    {file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"},
 ]
 
 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
 testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
+testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
 
 [[package]]
 name = "shellingham"
@@ -2431,4 +2430,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.11"
-content-hash = "2b9225ae66f4dab6a5785858312ecd2188a972698619418e017152ac2b38f41a"
+content-hash = "4635ad235914ef05225525233ce0723226417491de9f2551112682c707921365"
diff --git a/pyproject.toml b/pyproject.toml
index b0d75a9a1..22f0b66df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,7 +58,7 @@ platformdirs = "^3.10.0"
 soxr = "^0.3.6"
 
 [tool.poetry.group.dev.dependencies]
-pyinstaller = "^6.2.0"
+pyinstaller = "^5.13"
 pre-commit = "^2.16.0"
 poetry = "^1.3.1"
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index c9a7736db..2c877c547 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,4 +1,4 @@
-altgraph==0.17.4 ; python_version >= "3.11" and python_version < "3.12"
+altgraph==0.17.3 ; python_version >= "3.11" and python_version < "3.12"
 anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
 asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
 attrs==23.1.0 ; python_version >= "3.11" and python_version < "3.12"
@@ -28,7 +28,7 @@ jeepney==0.8.0 ; python_version >= "3.11" and python_version < "3.12" and sys_pl
 jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12"
 jsonschema==4.17.3 ; python_version >= "3.11" and python_version < "3.12"
 keyring==24.2.0 ; python_version >= "3.11" and python_version < "3.12"
-macholib==1.16.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "darwin"
+macholib==1.16.2 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "darwin"
 markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12"
 more-itertools==10.1.0 ; python_version >= "3.11" and python_version < "3.12"
 msgpack==1.0.5 ; python_version >= "3.11" and python_version < "3.12"
@@ -46,8 +46,8 @@ pre-commit==2.21.0 ; python_version >= "3.11" and python_version < "3.12"
 ptyprocess==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12"
 pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12"
-pyinstaller-hooks-contrib==2023.10 ; python_version >= "3.11" and python_version < "3.12"
-pyinstaller==6.2.0 ; python_version >= "3.11" and python_version < "3.12"
+pyinstaller-hooks-contrib==2023.7 ; python_version >= "3.11" and python_version < "3.12"
+pyinstaller==5.13.2 ; python_version >= "3.11" and python_version < "3.12"
 pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12"
 pyproject-hooks==1.0.0 ; python_version >= "3.11" and python_version < "3.12"
 pyrsistent==0.19.3 ; python_version >= "3.11" and python_version < "3.12"
@@ -60,7 +60,7 @@ requests-toolbelt==1.0.0 ; python_version >= "3.11" and python_version < "3.12"
 requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12"
 secretstorage==3.3.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "linux"
 semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
-setuptools==69.0.2 ; python_version >= "3.11" and python_version < "3.12"
+setuptools==68.1.2 ; python_version >= "3.11" and python_version < "3.12"
 shellingham==1.5.3 ; python_version >= "3.11" and python_version < "3.12"
 six==1.16.0 ; python_version >= "3.11" and python_version < "3.12"
 sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
diff --git a/run.py b/run.py
index ea737353c..f1ccf4e2f 100644
--- a/run.py
+++ b/run.py
@@ -89,7 +89,6 @@
     engine_root,
     get_latest_core_version,
     get_save_dir,
-    internal_root,
 )
 from voicevox_engine.utility.run_utility import decide_boolean_from_env
 
@@ -228,7 +227,7 @@ def check_disabled_mutable_api():
     metas_store = MetasStore(root_dir / "speaker_info")
 
     setting_ui_template = Jinja2Templates(
-        directory=internal_root() / "ui_template",
+        directory=engine_root() / "ui_template",
         variable_start_string="<JINJA_PRE>",
         variable_end_string="<JINJA_POST>",
     )
diff --git a/run.spec b/run.spec
index 65642c61d..970f2adfa 100644
--- a/run.spec
+++ b/run.spec
@@ -1,42 +1,49 @@
 # -*- mode: python ; coding: utf-8 -*-
 # このファイルはPyInstallerによって自動生成されたもので、それをカスタマイズして使用しています。
-from argparse import ArgumentParser
-from pathlib import Path
-from shutil import copy2, copytree
-
 from PyInstaller.utils.hooks import collect_data_files
-
-parser = ArgumentParser()
-parser.add_argument("--libcore_path", type=Path)
-parser.add_argument("--libonnxruntime_path", type=Path)
-parser.add_argument("--core_model_dir_path", type=Path)
-options = parser.parse_args()
-
-libonnxruntime_path: Path | None = options.libonnxruntime_path
-if libonnxruntime_path is not None and not libonnxruntime_path.is_file():
-    raise Exception(f"libonnxruntime_path: {libonnxruntime_path} is not file")
-
-libcore_path: Path | None = options.libcore_path
-if libcore_path is not None and not libcore_path.is_file():
-    raise Exception(f"libcore_path: {libcore_path} is not file")
-
-core_model_dir_path: Path | None = options.core_model_dir_path
-if core_model_dir_path is not None and not core_model_dir_path.is_dir():
-    raise Exception(f"core_model_dir_path: {core_model_dir_path} is not dir")
+import os
 
 datas = [
-    ("default.csv", "."),
-    ("presets.yaml", "."),
-    ("ui_template", "ui_template"),
+    ('engine_manifest_assets', 'engine_manifest_assets'),
+    ('speaker_info', 'speaker_info'),
+    ('engine_manifest.json', '.'),
+    ('default.csv', '.'),
+    ('licenses.json', '.'),
+    ('presets.yaml', '.'),
+    ('ui_template', 'ui_template'),
 ]
-datas += collect_data_files("pyopenjtalk")
+datas += collect_data_files('pyopenjtalk')
+
+core_model_dir_path = os.environ.get('CORE_MODEL_DIR_PATH')
+if core_model_dir_path:
+    print('CORE_MODEL_DIR_PATH is found:', core_model_dir_path)
+    if not os.path.isdir(core_model_dir_path):
+        raise Exception("CORE_MODEL_DIR_PATH was found, but it is not directory!")
+    datas += [(core_model_dir_path, "model")]
+
+# コアとONNX Runtimeはバイナリであるが、`binaries`に加えると
+# 依存関係のパスがPyInstallerに書き換えらるので、`datas`に加える
+# 参考: https://github.com/VOICEVOX/voicevox_engine/pull/446#issuecomment-1210052318
+libcore_path = os.environ.get('LIBCORE_PATH')
+if libcore_path:
+    print('LIBCORE_PATH is found:', libcore_path)
+    if not os.path.isfile(libcore_path):
+        raise Exception("LIBCORE_PATH was found, but it is not file!")
+    datas += [(libcore_path, ".")]
+
+libonnxruntime_path = os.environ.get('LIBONNXRUNTIME_PATH')
+if libonnxruntime_path:
+    print('LIBONNXRUNTIME_PATH is found:', libonnxruntime_path)
+    if not os.path.isfile(libonnxruntime_path):
+        raise Exception("LIBCORE_PATH was found, but it is not file!")
+    datas += [(libonnxruntime_path, ".")]
 
 
 block_cipher = None
 
 
 a = Analysis(
-    ["run.py"],
+    ['run.py'],
     pathex=[],
     binaries=[],
     datas=datas,
@@ -58,7 +65,7 @@ exe = EXE(
     a.scripts,
     [],
     exclude_binaries=True,
-    name="run",
+    name='run',
     debug=False,
     bootloader_ignore_signals=False,
     strip=False,
@@ -69,7 +76,6 @@ exe = EXE(
     target_arch=None,
     codesign_identity=None,
     entitlements_file=None,
-    contents_directory="engine_internal",
 )
 
 coll = COLLECT(
@@ -80,27 +86,5 @@ coll = COLLECT(
     strip=False,
     upx=True,
     upx_exclude=[],
-    name="run",
+    name='run',
 )
-
-# 実行ファイル作成後の処理
-
-# 実行ファイルと同じrootディレクトリ
-target_dir = Path(DISTPATH) / "run"
-
-# 動的ライブラリをコピー
-if libonnxruntime_path is not None:
-    copy2(libonnxruntime_path, target_dir)
-if libcore_path is not None:
-    copy2(libcore_path, target_dir)
-if core_model_dir_path is not None:
-    copytree(core_model_dir_path, target_dir / "model")
-
-# 互換性維持のために必要なファイルをコピー
-license_file_path = Path("licenses.json")
-if license_file_path.is_file():
-    copy2("licenses.json", target_dir)
-
-copytree("speaker_info", target_dir / "speaker_info")
-copy2("engine_manifest.json", target_dir)
-copytree("engine_manifest_assets", target_dir / "engine_manifest_assets")
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
index e931f557c..8661321f7 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict.py
@@ -12,9 +12,9 @@
 
 from .model import UserDictWord, WordTypes
 from .part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY, part_of_speech_data
-from .utility import get_save_dir, internal_root, mutex_wrapper
+from .utility import engine_root, get_save_dir, mutex_wrapper
 
-root_dir = internal_root()
+root_dir = engine_root()
 save_dir = get_save_dir()
 
 if not save_dir.is_dir():
diff --git a/voicevox_engine/utility/__init__.py b/voicevox_engine/utility/__init__.py
index 7ed74f118..d40fea3e6 100644
--- a/voicevox_engine/utility/__init__.py
+++ b/voicevox_engine/utility/__init__.py
@@ -5,7 +5,7 @@
 )
 from .core_version_utility import get_latest_core_version, parse_core_version
 from .mutex_utility import mutex_wrapper
-from .path_utility import delete_file, engine_root, get_save_dir, internal_root
+from .path_utility import delete_file, engine_root, get_save_dir
 
 __all__ = [
     "ConnectBase64WavesException",
@@ -13,9 +13,8 @@
     "decode_base64_waves",
     "get_latest_core_version",
     "parse_core_version",
-    "mutex_wrapper",
     "delete_file",
     "engine_root",
     "get_save_dir",
-    "internal_root",
+    "mutex_wrapper",
 ]
diff --git a/voicevox_engine/utility/path_utility.py b/voicevox_engine/utility/path_utility.py
index 6c5c36ca2..7c46ad40b 100644
--- a/voicevox_engine/utility/path_utility.py
+++ b/voicevox_engine/utility/path_utility.py
@@ -2,51 +2,18 @@
 import sys
 import traceback
 from pathlib import Path
-from typing import Literal
 
 from platformdirs import user_data_dir
 
 
-def _runtime_type() -> Literal["nuitka", "pyinstaller", "python"]:
-    """
-    コンパイルに使用したライブラリ名を返す。
-    コンパイルしていない場合は"python"を返す。
-    """
-    # nuitkaビルドをした際はグローバルに__compiled__が含まれる
-    if "__compiled__" in globals():
-        return "nuitka"
-
-    # pyinstallerでビルドをした際はsys.frozenが設定される
-    elif getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
-        return "pyinstaller"
-
-    return "python"
-
-
 def engine_root() -> Path:
-    """
-    開発環境ではリポジトリのルートディレクトリを返す。
-    コンパイル後は実行ファイルがあるディレクトリを返す。
-    """
-    runtime = _runtime_type()
-    if runtime == "nuitka":
-        root_dir = Path(sys.argv[0]).parent
-
-    elif runtime == "pyinstaller":
-        root_dir = Path(sys.executable).parent
-
-    else:
+    if is_development():
         root_dir = Path(__file__).parents[2]
 
-    return root_dir.resolve(strict=True)
-
+    # Nuitka/Pyinstallerでビルドされている場合
+    else:
+        root_dir = Path(sys.argv[0]).parent
 
-def internal_root() -> Path:
-    """
-    コンパイル時に収集された実行ファイル内部用のルートディレクトリを返す。
-    開発環境ではリポジトリのルートディレクトリを返す。
-    """
-    root_dir = Path(__file__).parents[2]
     return root_dir.resolve(strict=True)
 
 
@@ -55,7 +22,15 @@ def is_development() -> bool:
     開発版かどうか判定する関数
     Nuitka/Pyinstallerでコンパイルされていない場合は開発環境とする。
     """
-    return _runtime_type() == "python"
+    # nuitkaビルドをした際はグローバルに__compiled__が含まれる
+    if "__compiled__" in globals():
+        return False
+
+    # pyinstallerでビルドをした際はsys.frozenが設定される
+    elif getattr(sys, "frozen", False):
+        return False
+
+    return True
 
 
 def get_save_dir():

From cf014df3e5dec286df2f664116dd245436b48837 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 21 Jan 2024 17:27:58 +0900
Subject: [PATCH 145/177] =?UTF-8?q?[release-0.15]=20RESOURCE=E3=82=920.15.?=
 =?UTF-8?q?0=E3=81=AB=20(#1024)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

リソースを0.15.0に
---
 .github/workflows/build-docker.yml | 2 +-
 .github/workflows/build.yml        | 2 +-
 Dockerfile                         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml
index 73be54d12..11745791d 100644
--- a/.github/workflows/build-docker.yml
+++ b/.github/workflows/build-docker.yml
@@ -15,7 +15,7 @@ on:
 env:
   IMAGE_NAME: ${{ vars.DOCKERHUB_USERNAME }}/voicevox_engine
   PYTHON_VERSION: "3.11.3"
-  VOICEVOX_RESOURCE_VERSION: "0.14.5"
+  VOICEVOX_RESOURCE_VERSION: "0.15.0"
   VOICEVOX_CORE_VERSION: "0.14.6"
 
 defaults:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d16170dd6..a223b3950 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -26,7 +26,7 @@ on:
 
 env:
   PYTHON_VERSION: "3.11.3"
-  VOICEVOX_RESOURCE_VERSION: "0.14.5"
+  VOICEVOX_RESOURCE_VERSION: "0.15.0"
   VOICEVOX_CORE_VERSION: "0.14.6"
 
 defaults:
diff --git a/Dockerfile b/Dockerfile
index e4c4426fa..9b8f0c5a5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -275,7 +275,7 @@ RUN <<EOF
 EOF
 
 # Download Resource
-ARG VOICEVOX_RESOURCE_VERSION=0.14.5
+ARG VOICEVOX_RESOURCE_VERSION=0.15.0
 RUN <<EOF
     set -eux
 

From 8859cc0c340decb1d375cc6bc4d3d7d1f2652306 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 21 Jan 2024 23:13:05 +0900
Subject: [PATCH 146/177] =?UTF-8?q?[release-0.15]=200.15.1=E7=94=A8?=
 =?UTF-8?q?=E3=81=AE=E6=9B=B4=E6=96=B0=E6=83=85=E5=A0=B1=E3=82=92=E6=9B=B4?=
 =?UTF-8?q?=E6=96=B0=20(#1025)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 0.15.1用の更新情報更新

* Mac

* ビルド成果物のディレクトリ構造を元に戻した
---
 engine_manifest_assets/update_infos.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/engine_manifest_assets/update_infos.json b/engine_manifest_assets/update_infos.json
index 5c876d4d8..581f41d2c 100644
--- a/engine_manifest_assets/update_infos.json
+++ b/engine_manifest_assets/update_infos.json
@@ -1,4 +1,9 @@
 [
+  {
+    "version": "0.15.1",
+    "descriptions": ["ビルド成果物のディレクトリ構造を元に戻した"],
+    "contributors": []
+  },
   {
     "version": "0.15.0",
     "descriptions": [

From 7bc1b210a82f46fa4520b05106bb1891ca3635e6 Mon Sep 17 00:00:00 2001
From: Yuto Ashida <y-chan@y-chan.dev>
Date: Mon, 22 Jan 2024 23:25:45 +0900
Subject: [PATCH 147/177] =?UTF-8?q?[project-s]=20=E3=83=8F=E3=83=9F?=
 =?UTF-8?q?=E3=83=B3=E3=82=B0=E7=94=A8API=E3=82=92=E8=BF=BD=E5=8A=A0=20(#1?=
 =?UTF-8?q?008)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* update metas (add style type)

* update engine manifest (add frame rate)

* add sing api to core wrapper

* add sing api to core adapter

* add models for sing api

* add sing process to tts engine

* add sing api

* fix miss

* add fixme comment

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

* remove sing type

* fix typo

* remove optional

* translate error detail

* get -> create

* fix docs

* Revert "remove optional"

This reverts commit 12b8fc6413049c115f98035b07a74f12394ea5de.

* fix pytest

* add comment

* add fixme comment

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>

* improve models

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 engine_manifest.json                          |   1 +
 run.py                                        |  73 ++++++
 .../test_fetch_speakers_success.json          |  30 ++-
 voicevox_engine/core_adapter.py               |  81 +++++++
 voicevox_engine/core_wrapper.py               | 209 ++++++++++++++++++
 .../engine_manifest/EngineManifest.py         |   1 +
 .../engine_manifest/EngineManifestLoader.py   |   1 +
 voicevox_engine/metas/Metas.py                |   4 +-
 voicevox_engine/model.py                      |  40 ++++
 voicevox_engine/tts_pipeline/tts_engine.py    | 204 ++++++++++++++++-
 10 files changed, 627 insertions(+), 17 deletions(-)

diff --git a/engine_manifest.json b/engine_manifest.json
index f4a5dfaa1..c4dd5f352 100644
--- a/engine_manifest.json
+++ b/engine_manifest.json
@@ -9,6 +9,7 @@
     "port": 50021,
     "icon": "engine_manifest_assets/icon.png",
     "default_sampling_rate": 24000,
+    "frame_rate": 93.75,
     "terms_of_service": "engine_manifest_assets/terms_of_service.md",
     "update_infos": "engine_manifest_assets/update_infos.json",
     "dependency_licenses": "engine_manifest_assets/dependency_licenses.json",
diff --git a/run.py b/run.py
index 7041028cb..1843ba04c 100644
--- a/run.py
+++ b/run.py
@@ -42,10 +42,12 @@
     AudioQuery,
     BaseLibraryInfo,
     DownloadableLibraryInfo,
+    FrameAudioQuery,
     InstalledLibraryInfo,
     MorphableTargetInfo,
     ParseKanaBadRequest,
     ParseKanaError,
+    Score,
     Speaker,
     SpeakerInfo,
     StyleIdNotFoundError,
@@ -704,6 +706,77 @@ def _synthesis_morphing(
             background=BackgroundTask(delete_file, f.name),
         )
 
+    @app.post(
+        "/sing_frame_audio_query",
+        response_model=FrameAudioQuery,
+        tags=["クエリ作成"],
+        summary="歌唱音声合成用のクエリを作成する",
+    )
+    def sing_frame_audio_query(
+        score: Score,
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        core_version: str | None = None,
+    ) -> FrameAudioQuery:
+        """
+        歌唱音声合成用のクエリの初期値を得ます。ここで得られたクエリはそのまま歌唱音声合成に利用できます。各値の意味は`Schemas`を参照してください。
+        """
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
+        engine = get_engine(core_version)
+        core = get_core(core_version)
+        phonemes, f0, volume = engine.create_sing_phoneme_and_f0_and_volume(
+            score, style_id
+        )
+
+        return FrameAudioQuery(
+            f0=f0,
+            volume=volume,
+            phonemes=phonemes,
+            volumeScale=1,
+            outputSamplingRate=core.default_sampling_rate,
+            outputStereo=False,
+        )
+
+    @app.post(
+        "/frame_synthesis",
+        response_class=FileResponse,
+        responses={
+            200: {
+                "content": {
+                    "audio/wav": {"schema": {"type": "string", "format": "binary"}}
+                },
+            }
+        },
+        tags=["音声合成"],
+    )
+    def frame_synthesis(
+        query: FrameAudioQuery,
+        style_id: StyleId | None = Query(default=None),  # noqa: B008
+        speaker: StyleId | None = Query(default=None, deprecated=True),  # noqa: B008
+        core_version: str | None = None,
+    ) -> FileResponse:
+        """
+        歌唱音声合成を行います。
+        """
+        style_id = get_style_id_from_deprecated(
+            style_id=style_id, deprecated_speaker=speaker
+        )
+        engine = get_engine(core_version)
+        wave = engine.frame_synthsize_wave(query, style_id)
+
+        with NamedTemporaryFile(delete=False) as f:
+            soundfile.write(
+                file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
+            )
+
+        return FileResponse(
+            f.name,
+            media_type="audio/wav",
+            background=BackgroundTask(delete_file, f.name),
+        )
+
     @app.post(
         "/connect_waves",
         response_class=FileResponse,
diff --git a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json b/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
index f948bf0f1..9464bec39 100644
--- a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
+++ b/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
@@ -5,19 +5,23 @@
     "styles": [
       {
         "id": 0,
-        "name": "style0"
+        "name": "style0",
+        "type": null
       },
       {
         "id": 2,
-        "name": "style1"
+        "name": "style1",
+        "type": null
       },
       {
         "id": 4,
-        "name": "style2"
+        "name": "style2",
+        "type": null
       },
       {
         "id": 6,
-        "name": "style3"
+        "name": "style3",
+        "type": null
       }
     ],
     "supported_features": {
@@ -31,19 +35,23 @@
     "styles": [
       {
         "id": 1,
-        "name": "style0"
+        "name": "style0",
+        "type": null
       },
       {
         "id": 3,
-        "name": "style1"
+        "name": "style1",
+        "type": null
       },
       {
         "id": 5,
-        "name": "style2"
+        "name": "style2",
+        "type": null
       },
       {
         "id": 7,
-        "name": "style3"
+        "name": "style3",
+        "type": null
       }
     ],
     "supported_features": {
@@ -57,7 +65,8 @@
     "styles": [
       {
         "id": 8,
-        "name": "style0"
+        "name": "style0",
+        "type": null
       }
     ],
     "supported_features": {
@@ -71,7 +80,8 @@
     "styles": [
       {
         "id": 9,
-        "name": "style0"
+        "name": "style0",
+        "type": null
       }
     ],
     "supported_features": {
diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core_adapter.py
index 2fe77349a..5ff5d61f2 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core_adapter.py
@@ -143,3 +143,84 @@ def safe_decode_forward(
             )
         sr_wave = self.default_sampling_rate
         return wave, sr_wave
+
+    def safe_predict_sing_consonant_length_forward(
+        self,
+        consonant: NDArray[np.int64],
+        vowel: NDArray[np.int64],
+        note_duration: NDArray[np.int64],
+        style_id: StyleId,
+    ) -> NDArray[np.int64]:
+        # 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音付加」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+
+        with self.mutex:
+            consonant_length = self.core.predict_sing_consonant_length_forward(
+                length=consonant.shape[0],
+                consonant=consonant[np.newaxis],
+                vowel=vowel[np.newaxis],
+                note_duration=note_duration[np.newaxis],
+                style_id=np.array(style_id, dtype=np.int64).reshape(-1),
+            )
+
+        return consonant_length
+
+    def safe_predict_sing_f0_forward(
+        self,
+        phoneme: NDArray[np.int64],
+        note: NDArray[np.int64],
+        style_id: StyleId,
+    ) -> NDArray[np.float32]:
+        # 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音付加」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+
+        with self.mutex:
+            f0 = self.core.predict_sing_f0_forward(
+                length=phoneme.shape[0],
+                phoneme=phoneme[np.newaxis],
+                note=note[np.newaxis],
+                style_id=np.array(style_id, dtype=np.int64).reshape(-1),
+            )
+
+        return f0
+
+    def safe_predict_sing_volume_forward(
+        self,
+        phoneme: NDArray[np.int64],
+        note: NDArray[np.int64],
+        f0: NDArray[np.float32],
+        style_id: StyleId,
+    ) -> NDArray[np.float32]:
+        # 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音付加」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+
+        with self.mutex:
+            volume = self.core.predict_sing_volume_forward(
+                length=phoneme.shape[0],
+                phoneme=phoneme[np.newaxis],
+                note=note[np.newaxis],
+                f0=f0[np.newaxis],
+                style_id=np.array(style_id, dtype=np.int64).reshape(-1),
+            )
+
+        return volume
+
+    def safe_sf_decode_forward(
+        self,
+        phoneme: NDArray[np.int64],
+        f0: NDArray[np.float32],
+        volume: NDArray[np.float32],
+        style_id: StyleId,
+    ) -> tuple[NDArray[np.float32], int]:
+        # 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
+        self.initialize_style_id_synthesis(style_id, skip_reinit=True)
+        with self.mutex:
+            wave = self.core.sf_decode_forward(
+                length=phoneme.shape[0],
+                phoneme=phoneme[np.newaxis],
+                f0=f0[np.newaxis],
+                volume=volume[np.newaxis],
+                style_id=np.array(style_id, dtype=np.int64).reshape(-1),
+            )
+        sr_wave = self.default_sampling_rate
+        return wave, sr_wave
diff --git a/voicevox_engine/core_wrapper.py b/voicevox_engine/core_wrapper.py
index 83fa5f417..d8fb10f7f 100644
--- a/voicevox_engine/core_wrapper.py
+++ b/voicevox_engine/core_wrapper.py
@@ -432,6 +432,57 @@ def _type_decode_forward(core_cdll: CDLL) -> None:
     core_cdll.decode_forward.restype = c_bool
 
 
+def _type_predict_sing_consonant_length_forward(core_cdll: CDLL) -> None:
+    """コアDLL `predict_sing_consonant_length_forward` 関数を型付けする"""
+    core_cdll.predict_sing_consonant_length_forward.argtypes = (
+        c_int,
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+    )
+    core_cdll.predict_sing_consonant_length_forward.restype = c_bool
+
+
+def _type_predict_sing_f0_forward(core_cdll: CDLL) -> None:
+    """コアDLL `predict_sing_f0_forward` 関数を型付けする"""
+    core_cdll.predict_sing_f0_forward.argtypes = (
+        c_int,
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_float),
+    )
+    core_cdll.predict_sing_f0_forward.restype = c_bool
+
+
+def _type_predict_sing_volume_forward(core_cdll: CDLL) -> None:
+    """コアDLL `predict_sing_volume_forward` 関数を型付けする"""
+    core_cdll.predict_sing_volume_forward.argtypes = (
+        c_int,
+        POINTER(c_long),
+        POINTER(c_long),
+        POINTER(c_float),
+        POINTER(c_long),
+        POINTER(c_float),
+    )
+    core_cdll.predict_sing_volume_forward.restype = c_bool
+
+
+def _type_sf_decode_forward(core_cdll: CDLL) -> None:
+    """コアDLL `sf_decoder_forward` 関数を型付けする"""
+    core_cdll.sf_decode_forward.argtypes = (
+        c_int,
+        POINTER(c_long),
+        POINTER(c_float),
+        POINTER(c_float),
+        POINTER(c_long),
+        POINTER(c_float),
+    )
+    core_cdll.sf_decode_forward.restype = c_bool
+
+
 def _type_last_error_message(core_cdll: CDLL) -> None:
     """コアDLL `last_error_message` 関数を型付けする"""
     core_cdll.last_error_message.restype = c_char_p
@@ -477,6 +528,10 @@ def __init__(
         _type_yukarin_s_forward(self.core)
         _type_yukarin_sa_forward(self.core)
         _type_decode_forward(self.core)
+        _type_predict_sing_consonant_length_forward(self.core)
+        _type_predict_sing_f0_forward(self.core)
+        _type_predict_sing_volume_forward(self.core)
+        _type_sf_decode_forward(self.core)
         _type_last_error_message(self.core)
 
         self.exist_supported_devices = False
@@ -656,6 +711,160 @@ def decode_forward(
         )
         return output
 
+    def predict_sing_consonant_length_forward(
+        self,
+        length: int,
+        consonant: NDArray[np.int64],
+        vowel: NDArray[np.int64],
+        note_duration: NDArray[np.int64],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.int64]:
+        """
+        子音・母音列から、音素ごとの長さを求める関数
+        Parameters
+        ----------
+        length : int
+            音素列の長さ
+        consonant : NDArray[np.int64]
+            子音列
+        vowel : NDArray[np.int64]
+            母音列
+        note_duration : NDArray[np.int64]
+            ノート列
+        style_id : NDArray[np.int64]
+            スタイル番号
+        Returns
+        -------
+        output : NDArray[np.int64]
+            子音長
+        """
+        output = np.zeros((length,), dtype=np.int64)
+        self.assert_core_success(
+            self.core.predict_sing_consonant_length_forward(
+                c_int(length),
+                consonant.ctypes.data_as(POINTER(c_long)),
+                vowel.ctypes.data_as(POINTER(c_long)),
+                note_duration.ctypes.data_as(POINTER(c_long)),
+                style_id.ctypes.data_as(POINTER(c_long)),
+                output.ctypes.data_as(POINTER(c_long)),
+            )
+        )
+        return output
+
+    def predict_sing_f0_forward(
+        self,
+        length: int,
+        phoneme: NDArray[np.int64],
+        note: NDArray[np.int64],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
+        """
+        フレームごとの音素列とノート列から、フレームごとのF0を求める関数
+        Parameters
+        ----------
+        length : int
+            音素列の長さ
+        phoneme : NDArray[np.int64]
+            音素列
+        note : NDArray[np.int64]
+            ノート列
+        style_id : NDArray[np.int64]
+            スタイル番号
+        Returns
+        -------
+        output : NDArray[np.float32]
+            フレームごとのF0
+        """
+        output = np.zeros((length,), dtype=np.float32)
+        self.assert_core_success(
+            self.core.predict_sing_f0_forward(
+                c_int(length),
+                phoneme.ctypes.data_as(POINTER(c_long)),
+                note.ctypes.data_as(POINTER(c_long)),
+                style_id.ctypes.data_as(POINTER(c_long)),
+                output.ctypes.data_as(POINTER(c_float)),
+            )
+        )
+        return output
+
+    def predict_sing_volume_forward(
+        self,
+        length: int,
+        phoneme: NDArray[np.int64],
+        note: NDArray[np.int64],
+        f0: NDArray[np.float32],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
+        """
+        フレームごとの音素列とノート列から、フレームごとのvolumeを求める関数
+        Parameters
+        ----------
+        length : int
+            音素列の長さ
+        phoneme : NDArray[np.int64]
+            音素列
+        note : NDArray[np.int64]
+            ノート列
+        style_id : NDArray[np.int64]
+            スタイル番号
+        Returns
+        -------
+        output : NDArray[np.float32]
+            フレームごとのF0
+        """
+        output = np.zeros((length,), dtype=np.float32)
+        self.assert_core_success(
+            self.core.predict_sing_volume_forward(
+                c_int(length),
+                phoneme.ctypes.data_as(POINTER(c_long)),
+                note.ctypes.data_as(POINTER(c_long)),
+                f0.ctypes.data_as(POINTER(c_float)),
+                style_id.ctypes.data_as(POINTER(c_long)),
+                output.ctypes.data_as(POINTER(c_float)),
+            )
+        )
+        return output
+
+    def sf_decode_forward(
+        self,
+        length: int,
+        phoneme: NDArray[np.int64],
+        f0: NDArray[np.float32],
+        volume: NDArray[np.float32],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
+        """
+        フレームごとの音素と音高から波形を求める関数
+        Parameters
+        ----------
+        length : int
+            フレームの長さ
+        phoneme : NDArray[np.int64]
+            フレームごとの音素
+        f0 : NDArray[np.float32]
+            フレームごとの音高
+        volume : NDArray[np.float32]
+            フレームごとの音量
+        style_id : NDArray[np.int64]
+            スタイル番号
+        Returns
+        -------
+        output : NDArray[np.float32]
+            音声波形
+        """
+        output = np.zeros((length * 256,), dtype=np.float32)
+        self.assert_core_success(
+            self.core.sf_decode_forward(
+                c_int(length),
+                phoneme.ctypes.data_as(POINTER(c_long)),
+                f0.ctypes.data_as(POINTER(c_float)),
+                volume.ctypes.data_as(POINTER(c_float)),
+                style_id.ctypes.data_as(POINTER(c_long)),
+                output.ctypes.data_as(POINTER(c_float)),
+            )
+        )
+        return output
+
     def supported_devices(self) -> str:
         """
         coreから取得した対応デバイスに関するjsonデータの文字列
diff --git a/voicevox_engine/engine_manifest/EngineManifest.py b/voicevox_engine/engine_manifest/EngineManifest.py
index a203767aa..f3a02e173 100644
--- a/voicevox_engine/engine_manifest/EngineManifest.py
+++ b/voicevox_engine/engine_manifest/EngineManifest.py
@@ -57,6 +57,7 @@ class EngineManifest(BaseModel):
     url: str = Field(title="エンジンのURL")
     icon: str = Field(title="エンジンのアイコンをBASE64エンコードしたもの")
     default_sampling_rate: int = Field(title="デフォルトのサンプリング周波数")
+    frame_rate: float = Field(title="エンジンのフレームレート")
     terms_of_service: str = Field(title="エンジンの利用規約")
     update_infos: List[UpdateInfo] = Field(title="エンジンのアップデート情報")
     dependency_licenses: List[LicenseInfo] = Field(title="依存関係のライセンス情報")
diff --git a/voicevox_engine/engine_manifest/EngineManifestLoader.py b/voicevox_engine/engine_manifest/EngineManifestLoader.py
index 5f6f2199d..5335dd2bc 100644
--- a/voicevox_engine/engine_manifest/EngineManifestLoader.py
+++ b/voicevox_engine/engine_manifest/EngineManifestLoader.py
@@ -20,6 +20,7 @@ def load_manifest(self) -> EngineManifest:
             uuid=manifest["uuid"],
             url=manifest["url"],
             default_sampling_rate=manifest["default_sampling_rate"],
+            frame_rate=manifest["frame_rate"],
             icon=b64encode((self.root_dir / manifest["icon"]).read_bytes()).decode(
                 "utf-8"
             ),
diff --git a/voicevox_engine/metas/Metas.py b/voicevox_engine/metas/Metas.py
index bc615a16f..39c45cb64 100644
--- a/voicevox_engine/metas/Metas.py
+++ b/voicevox_engine/metas/Metas.py
@@ -1,11 +1,12 @@
 from enum import Enum
-from typing import List, NewType, Optional
+from typing import List, Literal, NewType, Optional
 
 from pydantic import BaseModel, Field
 
 # NOTE: 循環importを防ぐためにとりあえずここに書いている
 # FIXME: 他のmodelに依存せず、全modelから参照できる場所に配置する
 StyleId = NewType("StyleId", int)
+StyleType = Literal["talk", "humming", "sing_teacher"]
 
 
 class SpeakerStyle(BaseModel):
@@ -15,6 +16,7 @@ class SpeakerStyle(BaseModel):
 
     name: str = Field(title="スタイル名")
     id: StyleId = Field(title="スタイルID")
+    type: Optional[StyleType] = Field(title="モデルの種類")
 
 
 class SpeakerSupportPermittedSynthesisMorphing(str, Enum):
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index cd92cab4f..f9e50b62c 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -72,6 +72,46 @@ def __hash__(self):
         return hash(tuple(sorted(items)))
 
 
+class Note(BaseModel):
+    """
+    音符ごとの情報
+    """
+
+    key: int | None = Field(title="音階")
+    frame_length: int = Field(title="音符のフレーム長")
+    lyric: str = Field(title="音符の歌詞")
+
+
+class Score(BaseModel):
+    """
+    楽譜情報
+    """
+
+    notes: List[Note] = Field(title="音符のリスト")
+
+
+class FramePhoneme(BaseModel):
+    """
+    音素の情報
+    """
+
+    phoneme: str = Field(title="音素")
+    frame_length: int = Field(title="音素のフレーム長")
+
+
+class FrameAudioQuery(BaseModel):
+    """
+    フレームごとの音声合成用のクエリ
+    """
+
+    f0: List[float] = Field(title="フレームごとの基本周波数")
+    volume: List[float] = Field(title="フレームごとの音量")
+    phonemes: List[FramePhoneme] = Field(title="音素のリスト")
+    volumeScale: float = Field(title="全体の音量")
+    outputSamplingRate: int = Field(title="音声データの出力サンプリングレート")
+    outputStereo: bool = Field(title="音声データをステレオ出力するか否か")
+
+
 class ParseKanaErrorCode(Enum):
     UNKNOWN_TEXT = "判別できない読み仮名があります: {text}"
     ACCENT_TOP = "句頭にアクセントは置けません: {text}"
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 610330bda..ceb2843b7 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -2,16 +2,19 @@
 import math
 
 import numpy as np
+from fastapi import HTTPException
 from numpy.typing import NDArray
 from soxr import resample
 
 from ..core_adapter import CoreAdapter
 from ..core_wrapper import CoreWrapper
 from ..metas.Metas import StyleId
-from ..model import AccentPhrase, AudioQuery, Mora
+from ..model import AccentPhrase, AudioQuery, FrameAudioQuery, Mora
+from ..model import FramePhoneme
+from ..model import Score
 from .acoustic_feature_extractor import Phoneme
 from .kana_converter import parse_kana
-from .mora_list import mora_phonemes_to_mora_kana
+from .mora_list import mora_kana_to_mora_phonemes, mora_phonemes_to_mora_kana
 from .text_analyzer import text_to_accent_phrases
 
 # 疑問文語尾定数
@@ -172,14 +175,14 @@ def apply_intonation_scale(moras: list[Mora], query: AudioQuery) -> list[Mora]:
 
 
 def apply_volume_scale(
-    wave: NDArray[np.float32], query: AudioQuery
+    wave: NDArray[np.float32], query: AudioQuery | FrameAudioQuery
 ) -> NDArray[np.float32]:
     """音声波形へ音声合成用のクエリがもつ音量スケール（`volumeScale`）を適用する"""
     return wave * query.volumeScale
 
 
 def apply_output_sampling_rate(
-    wave: NDArray[np.float32], sr_wave: float, query: AudioQuery
+    wave: NDArray[np.float32], sr_wave: float, query: AudioQuery | FrameAudioQuery
 ) -> NDArray[np.float32]:
     """音声波形へ音声合成用のクエリがもつ出力サンプリングレート（`outputSamplingRate`）を適用する"""
     # サンプリングレート一致のときはスルー
@@ -190,7 +193,7 @@ def apply_output_sampling_rate(
 
 
 def apply_output_stereo(
-    wave: NDArray[np.float32], query: AudioQuery
+    wave: NDArray[np.float32], query: AudioQuery | FrameAudioQuery
 ) -> NDArray[np.float32]:
     """音声波形へ音声合成用のクエリがもつステレオ出力設定（`outputStereo`）を適用する"""
     if query.outputStereo:
@@ -223,7 +226,7 @@ def query_to_decoder_feature(
 
 
 def raw_wave_to_output_wave(
-    query: AudioQuery, wave: NDArray[np.float32], sr_wave: int
+    query: AudioQuery | FrameAudioQuery, wave: NDArray[np.float32], sr_wave: int
 ) -> NDArray[np.float32]:
     """生音声波形に音声合成用のクエリを適用して出力音声波形を生成する"""
     wave = apply_volume_scale(wave, query)
@@ -232,6 +235,55 @@ def raw_wave_to_output_wave(
     return wave
 
 
+def _hira_to_kana(text: str) -> str:
+    """ひらがなをカタカナに変換する"""
+    return "".join(chr(ord(c) + 96) if "ぁ" <= c <= "ゔ" else c for c in text)
+
+
+def calc_phoneme_lengths(
+    consonant_lengths: NDArray[np.int64],
+    note_durations: NDArray[np.int64],
+) -> NDArray[np.int64]:
+    """
+    子音長と音符長から音素長を計算する
+    ただし、母音はノートの頭にくるようにするため、
+    予測された子音長は前のノートの長さを超えないように調整される
+    """
+    phoneme_durations = []
+    for i in range(len(consonant_lengths)):
+        if i < len(consonant_lengths) - 1:
+            # 最初のノートは子音長が0の、pauである必要がある
+            if i == 0 and consonant_lengths[i] != 0:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"consonant_lengths[0] must be 0, but {consonant_lengths[0]}",
+                )
+
+            next_consonant_length = consonant_lengths[i + 1]
+            note_duration = note_durations[i]
+
+            # もし、次のノートの子音長が負になる場合、現在のノートの半分にする
+            if next_consonant_length < 0:
+                next_consonant_length = consonant_lengths[i + 1] = note_duration // 2
+            vowel_length = note_duration - next_consonant_length
+
+            # もし、現在のノートの母音長が負になる場合、
+            # 次のノートの子音長を現在のノートの半分にする
+            if vowel_length < 0:
+                next_consonant_length = consonant_lengths[i + 1] = note_duration // 2
+                vowel_length = note_duration - next_consonant_length
+
+            phoneme_durations.append(vowel_length)
+            if next_consonant_length > 0:
+                phoneme_durations.append(next_consonant_length)
+        else:
+            vowel_length = note_durations[i]
+            phoneme_durations.append(vowel_length)
+
+    phoneme_durations_array = np.array(phoneme_durations, dtype=np.int64)
+    return phoneme_durations_array
+
+
 class TTSEngine:
     """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
 
@@ -374,6 +426,146 @@ def synthesize_wave(
         wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
         return wave
 
+    # FIXME: sing用のエンジンに移すかクラス名変える
+    # 返す値の総称を考え、関数名を変更する
+    def create_sing_phoneme_and_f0_and_volume(
+        self,
+        score: Score,
+        style_id: StyleId,
+    ) -> tuple[list[FramePhoneme], list[float], list[float]]:
+        """歌声合成用のスコア・スタイルIDに基づいてフレームごとの音素・音高・音量を生成する"""
+        notes = score.notes
+
+        # Scoreを分解し、ノート単位のデータ、音素単位のデータを作成する
+        note_lengths: list[int] = []
+        note_consonants: list[int] = []
+        note_vowels: list[int] = []
+        phonemes: list[int] = []
+        phoneme_keys: list[int] = []
+
+        for note in notes:
+            if note.lyric == "":
+                if note.key is not None:
+                    raise HTTPException(
+                        status_code=400,
+                        detail="lyricが空文字列の場合、keyはnullである必要があります。",
+                    )
+                note_lengths.append(note.frame_length)
+                note_consonants.append(-1)
+                note_vowels.append(0)  # pau
+                phonemes.append(0)  # pau
+                phoneme_keys.append(-1)
+            else:
+                if note.key is None:
+                    raise HTTPException(
+                        status_code=400,
+                        detail="keyがnullの場合、lyricは空文字列である必要があります。",
+                    )
+
+                # TODO: 1ノートに複数のモーラがある場合の処理
+                mora_phonemes = mora_kana_to_mora_phonemes.get(
+                    note.lyric  # type: ignore
+                ) or mora_kana_to_mora_phonemes.get(
+                    _hira_to_kana(note.lyric)  # type: ignore
+                )
+                if mora_phonemes is None:
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"lyricが不正です: {note.lyric}",
+                    )
+
+                consonant, vowel = mora_phonemes
+                if consonant is None:
+                    consonant_id = -1
+                else:
+                    consonant_id = Phoneme(consonant).id
+                vowel_id = Phoneme(vowel).id
+
+                note_lengths.append(note.frame_length)
+                note_consonants.append(consonant_id)
+                note_vowels.append(vowel_id)
+                if consonant_id != -1:
+                    phonemes.append(consonant_id)
+                    phoneme_keys.append(note.key)
+                phonemes.append(vowel_id)
+                phoneme_keys.append(note.key)
+
+        # 各データをnumpy配列に変換する
+        note_lengths_array = np.array(note_lengths, dtype=np.int64)
+        note_consonants_array = np.array(note_consonants, dtype=np.int64)
+        note_vowels_array = np.array(note_vowels, dtype=np.int64)
+        phonemes_array = np.array(phonemes, dtype=np.int64)
+        phoneme_keys_array = np.array(phoneme_keys, dtype=np.int64)
+
+        # コアを用いて子音長を生成する
+        consonant_lengths = self._core.safe_predict_sing_consonant_length_forward(
+            note_consonants_array, note_vowels_array, note_lengths_array, style_id
+        )
+
+        # 予測した子音長を元に、すべての音素長を計算する
+        phoneme_lengths = calc_phoneme_lengths(consonant_lengths, note_lengths_array)
+
+        # 時間スケールを変更する（音素 → フレーム）
+        frame_phonemes = np.repeat(phonemes_array, phoneme_lengths)
+        frame_keys = np.repeat(phoneme_keys_array, phoneme_lengths)
+
+        # コアを用いて音高を生成する
+        f0s = self._core.safe_predict_sing_f0_forward(
+            frame_phonemes, frame_keys, style_id
+        )
+
+        # コアを用いて音量を生成する
+        # FIXME: 変数名のsいらない？
+        volumes = self._core.safe_predict_sing_volume_forward(
+            frame_phonemes, frame_keys, f0s, style_id
+        )
+
+        phoneme_data_list = [
+            FramePhoneme(
+                phoneme=Phoneme._PHONEME_LIST[phoneme_id],
+                frame_length=phoneme_duration,
+            )
+            for phoneme_id, phoneme_duration in zip(phonemes, phoneme_lengths)
+        ]
+
+        return phoneme_data_list, f0s.tolist(), volumes.tolist()
+
+    def frame_synthsize_wave(
+        self,
+        frame_audio_query: FrameAudioQuery,
+        style_id: StyleId,
+    ) -> NDArray[np.float32]:
+        """歌声合成用のクエリ・スタイルIDに基づいて音声波形を生成する"""
+
+        # 各データを分解・numpy配列に変換する
+        phonemes = []
+        phoneme_lengths = []
+
+        for phoneme in frame_audio_query.phonemes:
+            if phoneme.phoneme not in Phoneme._PHONEME_LIST:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"phoneme {phoneme.phoneme} is not valid",
+                )
+
+            phonemes.append(Phoneme(phoneme.phoneme).id)
+            phoneme_lengths.append(phoneme.frame_length)
+
+        phonemes_array = np.array(phonemes, dtype=np.int64)
+        phoneme_lengths_array = np.array(phoneme_lengths, dtype=np.int64)
+
+        frame_phonemes = np.repeat(phonemes_array, phoneme_lengths_array)
+        f0s = np.array(frame_audio_query.f0, dtype=np.float32)
+        volumes = np.array(frame_audio_query.volume, dtype=np.float32)
+
+        # コアを用いて音声を生成する
+        raw_wave, sr_raw_wave = self._core.safe_sf_decode_forward(
+            frame_phonemes, f0s, volumes, style_id
+        )
+
+        wave = raw_wave_to_output_wave(frame_audio_query, raw_wave, sr_raw_wave)
+        return wave
+
 
 def make_tts_engines_from_cores(cores: dict[str, CoreAdapter]) -> dict[str, TTSEngine]:
     """コア一覧からTTSエンジン一覧を生成する"""

From eef3a17ec75fcc7a5b5f74ab7640c8f4e9e31d48 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 23 Jan 2024 00:47:53 +0900
Subject: [PATCH 148/177] =?UTF-8?q?manage=5Flibrary=E3=82=92true=E3=81=AB?=
 =?UTF-8?q?=E6=88=BB=E3=81=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 engine_manifest.json                          |   2 +-
 ...\343\202\222\347\242\272\350\252\215.json" | 240 ++++++++++++++++++
 2 files changed, 241 insertions(+), 1 deletion(-)

diff --git a/engine_manifest.json b/engine_manifest.json
index bf5db2d44..f6ced765e 100644
--- a/engine_manifest.json
+++ b/engine_manifest.json
@@ -55,7 +55,7 @@
         },
         "manage_library": {
             "type": "bool",
-            "value": false,
+            "value": true,
             "name": "音声ライブラリのインストール・アンインストール"
         }
     }
diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index d20acdf92..84083909c 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -164,6 +164,48 @@
         "title": "CorsPolicyMode",
         "type": "string"
       },
+      "DownloadableLibraryInfo": {
+        "description": "ダウンロード可能な音声ライブラリの情報",
+        "properties": {
+          "bytes": {
+            "title": "音声ライブラリのバイト数",
+            "type": "integer"
+          },
+          "download_url": {
+            "title": "音声ライブラリのダウンロードURL",
+            "type": "string"
+          },
+          "name": {
+            "title": "音声ライブラリの名前",
+            "type": "string"
+          },
+          "speakers": {
+            "items": {
+              "$ref": "#/components/schemas/LibrarySpeaker"
+            },
+            "title": "音声ライブラリに含まれる話者のリスト",
+            "type": "array"
+          },
+          "uuid": {
+            "title": "音声ライブラリのUUID",
+            "type": "string"
+          },
+          "version": {
+            "title": "音声ライブラリのバージョン",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "uuid",
+          "version",
+          "download_url",
+          "bytes",
+          "speakers"
+        ],
+        "title": "DownloadableLibraryInfo",
+        "type": "object"
+      },
       "EngineManifest": {
         "description": "エンジン自体に関する情報",
         "properties": {
@@ -255,6 +297,80 @@
         "title": "HTTPValidationError",
         "type": "object"
       },
+      "InstalledLibraryInfo": {
+        "description": "インストール済み音声ライブラリの情報",
+        "properties": {
+          "bytes": {
+            "title": "音声ライブラリのバイト数",
+            "type": "integer"
+          },
+          "download_url": {
+            "title": "音声ライブラリのダウンロードURL",
+            "type": "string"
+          },
+          "name": {
+            "title": "音声ライブラリの名前",
+            "type": "string"
+          },
+          "speakers": {
+            "items": {
+              "$ref": "#/components/schemas/LibrarySpeaker"
+            },
+            "title": "音声ライブラリに含まれる話者のリスト",
+            "type": "array"
+          },
+          "uninstallable": {
+            "title": "アンインストール可能かどうか",
+            "type": "boolean"
+          },
+          "uuid": {
+            "title": "音声ライブラリのUUID",
+            "type": "string"
+          },
+          "version": {
+            "title": "音声ライブラリのバージョン",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "uuid",
+          "version",
+          "download_url",
+          "bytes",
+          "speakers",
+          "uninstallable"
+        ],
+        "title": "InstalledLibraryInfo",
+        "type": "object"
+      },
+      "LibrarySpeaker": {
+        "description": "音声ライブラリに含まれる話者の情報",
+        "properties": {
+          "speaker": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Speaker"
+              }
+            ],
+            "title": "話者情報"
+          },
+          "speaker_info": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/SpeakerInfo"
+              }
+            ],
+            "title": "話者の追加情報"
+          }
+        },
+        "required": [
+          "speaker",
+          "speaker_info"
+        ],
+        "title": "LibrarySpeaker",
+        "type": "object"
+      },
       "LicenseInfo": {
         "description": "依存ライブラリのライセンス情報",
         "properties": {
@@ -1270,6 +1386,32 @@
         ]
       }
     },
+    "/downloadable_libraries": {
+      "get": {
+        "description": "ダウンロード可能な音声ライブラリの情報を返します。\n\nReturns\n-------\nret_data: list[DownloadableLibrary]",
+        "operationId": "downloadable_libraries_downloadable_libraries_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/DownloadableLibraryInfo"
+                  },
+                  "title": "Response Downloadable Libraries Downloadable Libraries Get",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Downloadable Libraries",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
     "/engine_manifest": {
       "get": {
         "operationId": "engine_manifest_engine_manifest_get",
@@ -1398,6 +1540,68 @@
         ]
       }
     },
+    "/install_library/{library_uuid}": {
+      "post": {
+        "description": "音声ライブラリをインストールします。\n音声ライブラリのZIPファイルをリクエストボディとして送信してください。\n\nParameters\n----------\nlibrary_uuid: str\n    音声ライブラリのID",
+        "operationId": "install_library_install_library__library_uuid__post",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "library_uuid",
+            "required": true,
+            "schema": {
+              "title": "Library Uuid",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Install Library",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
+    "/installed_libraries": {
+      "get": {
+        "description": "インストールした音声ライブラリの情報を返します。\n\nReturns\n-------\nret_data: dict[str, InstalledLibrary]",
+        "operationId": "installed_libraries_installed_libraries_get",
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "additionalProperties": {
+                    "$ref": "#/components/schemas/InstalledLibraryInfo"
+                  },
+                  "title": "Response Installed Libraries Installed Libraries Get",
+                  "type": "object"
+                }
+              }
+            },
+            "description": "Successful Response"
+          }
+        },
+        "summary": "Installed Libraries",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
     "/is_initialized_speaker": {
       "get": {
         "description": "指定されたスタイルが初期化されているかどうかを返します。",
@@ -2159,6 +2363,42 @@
         ]
       }
     },
+    "/uninstall_library/{library_uuid}": {
+      "post": {
+        "description": "音声ライブラリをアンインストールします。\n\nParameters\n----------\nlibrary_uuid: str\n    音声ライブラリのID",
+        "operationId": "uninstall_library_uninstall_library__library_uuid__post",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "library_uuid",
+            "required": true,
+            "schema": {
+              "title": "Library Uuid",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Uninstall Library",
+        "tags": [
+          "音声ライブラリ管理"
+        ]
+      }
+    },
     "/update_preset": {
       "post": {
         "description": "既存のプリセットを更新します\n\nParameters\n-------\npreset: Preset\n    更新するプリセット。\n    プリセットIDが更新対象と一致している必要があります。\n\nReturns\n-------\nid: int\n    更新したプリセットのプリセットID",

From 2b88ff2a13c8552e43851ec335c91b35b358ff3c Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 28 Jan 2024 06:31:35 +0900
Subject: [PATCH 149/177] =?UTF-8?q?[project-s]=20sing=E8=BF=BD=E5=8A=A0?=
 =?UTF-8?q?=E3=81=97=E3=81=A6=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88?=
 =?UTF-8?q?=E3=82=92talk=E3=81=AB=E3=81=99=E3=82=8B=20(#1030)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* sing追加してデフォルトをtalkにする

* test
---
 ...\343\202\222\347\242\272\350\252\215.json" |  4 +++-
 .../test_fetch_speakers_success.json          | 20 +++++++++----------
 voicevox_engine/metas/Metas.py                |  4 ++--
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index adee912c3..c8f168a31 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -725,10 +725,12 @@
             "type": "string"
           },
           "type": {
+            "default": "talk",
             "enum": [
               "talk",
               "humming",
-              "sing_teacher"
+              "sing_teacher",
+              "sing"
             ],
             "title": "モデルの種類",
             "type": "string"
diff --git a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json b/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
index 9464bec39..fba38bc61 100644
--- a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
+++ b/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
@@ -6,22 +6,22 @@
       {
         "id": 0,
         "name": "style0",
-        "type": null
+        "type": "talk"
       },
       {
         "id": 2,
         "name": "style1",
-        "type": null
+        "type": "talk"
       },
       {
         "id": 4,
         "name": "style2",
-        "type": null
+        "type": "talk"
       },
       {
         "id": 6,
         "name": "style3",
-        "type": null
+        "type": "talk"
       }
     ],
     "supported_features": {
@@ -36,22 +36,22 @@
       {
         "id": 1,
         "name": "style0",
-        "type": null
+        "type": "talk"
       },
       {
         "id": 3,
         "name": "style1",
-        "type": null
+        "type": "talk"
       },
       {
         "id": 5,
         "name": "style2",
-        "type": null
+        "type": "talk"
       },
       {
         "id": 7,
         "name": "style3",
-        "type": null
+        "type": "talk"
       }
     ],
     "supported_features": {
@@ -66,7 +66,7 @@
       {
         "id": 8,
         "name": "style0",
-        "type": null
+        "type": "talk"
       }
     ],
     "supported_features": {
@@ -81,7 +81,7 @@
       {
         "id": 9,
         "name": "style0",
-        "type": null
+        "type": "talk"
       }
     ],
     "supported_features": {
diff --git a/voicevox_engine/metas/Metas.py b/voicevox_engine/metas/Metas.py
index 39c45cb64..2d79c263a 100644
--- a/voicevox_engine/metas/Metas.py
+++ b/voicevox_engine/metas/Metas.py
@@ -6,7 +6,7 @@
 # NOTE: 循環importを防ぐためにとりあえずここに書いている
 # FIXME: 他のmodelに依存せず、全modelから参照できる場所に配置する
 StyleId = NewType("StyleId", int)
-StyleType = Literal["talk", "humming", "sing_teacher"]
+StyleType = Literal["talk", "humming", "sing_teacher", "sing"]
 
 
 class SpeakerStyle(BaseModel):
@@ -16,7 +16,7 @@ class SpeakerStyle(BaseModel):
 
     name: str = Field(title="スタイル名")
     id: StyleId = Field(title="スタイルID")
-    type: Optional[StyleType] = Field(title="モデルの種類")
+    type: Optional[StyleType] = Field(default="talk", title="モデルの種類")
 
 
 class SpeakerSupportPermittedSynthesisMorphing(str, Enum):

From 98a429c74db3220f8a7ba38925aa71b37a43dd48 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Sun, 28 Jan 2024 09:23:00 +0900
Subject: [PATCH 150/177] =?UTF-8?q?[release-0.16]=20release=200.16?=
 =?UTF-8?q?=E5=90=91=E3=81=91=E3=81=AE=E3=83=AA=E3=82=BD=E3=83=BC=E3=82=B9?=
 =?UTF-8?q?=E7=AD=89=E3=82=A2=E3=83=83=E3=83=97=E3=83=87=E3=83=BC=E3=83=88?=
 =?UTF-8?q?=20(#1031)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

release 0.16向けのリソース等アップデート
---
 .github/workflows/build-docker.yml       | 4 ++--
 .github/workflows/build.yml              | 4 ++--
 Dockerfile                               | 4 ++--
 engine_manifest_assets/update_infos.json | 5 +++++
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml
index 11745791d..f146f3bff 100644
--- a/.github/workflows/build-docker.yml
+++ b/.github/workflows/build-docker.yml
@@ -15,8 +15,8 @@ on:
 env:
   IMAGE_NAME: ${{ vars.DOCKERHUB_USERNAME }}/voicevox_engine
   PYTHON_VERSION: "3.11.3"
-  VOICEVOX_RESOURCE_VERSION: "0.15.0"
-  VOICEVOX_CORE_VERSION: "0.14.6"
+  VOICEVOX_RESOURCE_VERSION: "0.16.0"
+  VOICEVOX_CORE_VERSION: "0.15.0"
 
 defaults:
   run:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a223b3950..2084ff6b6 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -26,8 +26,8 @@ on:
 
 env:
   PYTHON_VERSION: "3.11.3"
-  VOICEVOX_RESOURCE_VERSION: "0.15.0"
-  VOICEVOX_CORE_VERSION: "0.14.6"
+  VOICEVOX_RESOURCE_VERSION: "0.16.0"
+  VOICEVOX_CORE_VERSION: "0.15.0"
 
 defaults:
   run:
diff --git a/Dockerfile b/Dockerfile
index 9b8f0c5a5..4613717dc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,7 +23,7 @@ EOF
 # assert VOICEVOX_CORE_VERSION >= 0.11.0 (ONNX)
 ARG TARGETPLATFORM
 ARG USE_GPU=false
-ARG VOICEVOX_CORE_VERSION=0.14.6
+ARG VOICEVOX_CORE_VERSION=0.15.0
 
 RUN <<EOF
     set -eux
@@ -275,7 +275,7 @@ RUN <<EOF
 EOF
 
 # Download Resource
-ARG VOICEVOX_RESOURCE_VERSION=0.15.0
+ARG VOICEVOX_RESOURCE_VERSION=0.16.0
 RUN <<EOF
     set -eux
 
diff --git a/engine_manifest_assets/update_infos.json b/engine_manifest_assets/update_infos.json
index 581f41d2c..529ff9a91 100644
--- a/engine_manifest_assets/update_infos.json
+++ b/engine_manifest_assets/update_infos.json
@@ -1,4 +1,9 @@
 [
+  {
+    "version": "0.16.0",
+    "descriptions": ["ハミング用のAPIを追加"],
+    "contributors": ["Hiroshiba", "y-chan"]
+  },
   {
     "version": "0.15.1",
     "descriptions": ["ビルド成果物のディレクトリ構造を元に戻した"],

From 66837f0e437fdf163688fbe236a087ff5eb9be3f Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 31 Jan 2024 08:43:32 +0900
Subject: [PATCH 151/177] =?UTF-8?q?[release-0.16]=20style=5Ftype=E3=81=AE?=
 =?UTF-8?q?=E5=90=8D=E7=A7=B0=E3=82=92=E5=A4=89=E6=9B=B4=20(#1033)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

style_typeの名称を変更
---
 ...01\250\343\202\222\347\242\272\350\252\215.json" |  6 +++---
 voicevox_engine/metas/Metas.py                      | 13 +++++++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index c8f168a31..f2b2d7803 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -728,11 +728,11 @@
             "default": "talk",
             "enum": [
               "talk",
-              "humming",
-              "sing_teacher",
+              "singing_teacher",
+              "frame_decode",
               "sing"
             ],
-            "title": "モデルの種類",
+            "title": "モデルの種類。talk:音声合成クエリの作成と音声合成が可能。singing_teacher:歌唱音声合成用のクエリの作成が可能。frame_decode:歌唱音声合成が可能。sing:歌唱音声合成用のクエリの作成と歌唱音声合成が可能。",
             "type": "string"
           }
         },
diff --git a/voicevox_engine/metas/Metas.py b/voicevox_engine/metas/Metas.py
index 2d79c263a..eda288eb8 100644
--- a/voicevox_engine/metas/Metas.py
+++ b/voicevox_engine/metas/Metas.py
@@ -6,7 +6,7 @@
 # NOTE: 循環importを防ぐためにとりあえずここに書いている
 # FIXME: 他のmodelに依存せず、全modelから参照できる場所に配置する
 StyleId = NewType("StyleId", int)
-StyleType = Literal["talk", "humming", "sing_teacher", "sing"]
+StyleType = Literal["talk", "singing_teacher", "frame_decode", "sing"]
 
 
 class SpeakerStyle(BaseModel):
@@ -16,7 +16,16 @@ class SpeakerStyle(BaseModel):
 
     name: str = Field(title="スタイル名")
     id: StyleId = Field(title="スタイルID")
-    type: Optional[StyleType] = Field(default="talk", title="モデルの種類")
+    type: Optional[StyleType] = Field(
+        default="talk",
+        title=(
+            "モデルの種類。"
+            "talk:音声合成クエリの作成と音声合成が可能。"
+            "singing_teacher:歌唱音声合成用のクエリの作成が可能。"
+            "frame_decode:歌唱音声合成が可能。"
+            "sing:歌唱音声合成用のクエリの作成と歌唱音声合成が可能。"
+        ),
+    )
 
 
 class SpeakerSupportPermittedSynthesisMorphing(str, Enum):

From 8a7f63361e176e1cff34f61751e7818e11188d7b Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 31 Jan 2024 15:11:48 +0900
Subject: [PATCH 152/177] =?UTF-8?q?=E3=83=9E=E3=83=8B=E3=83=95=E3=82=A7?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=E3=81=ABsing=E8=83=BD=E5=8A=9B=E8=BF=BD?=
 =?UTF-8?q?=E5=8A=A0=20(#1035)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sing能力追加
---
 engine_manifest.json                                       | 5 +++++
 ...3\343\201\250\343\202\222\347\242\272\350\252\215.json" | 7 ++++++-
 voicevox_engine/engine_manifest/EngineManifest.py          | 1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/engine_manifest.json b/engine_manifest.json
index abc76859a..82121f652 100644
--- a/engine_manifest.json
+++ b/engine_manifest.json
@@ -54,6 +54,11 @@
             "value": true,
             "name": "2種類のスタイルでモーフィングした音声を合成"
         },
+        "sing" : {
+            "type": "bool",
+            "value": true,
+            "name": "歌唱音声合成"
+        },
         "manage_library": {
             "type": "bool",
             "value": true,
diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index f2b2d7803..7affe7ef7 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -859,6 +859,10 @@
             "title": "音声ライブラリのインストール・アンインストール",
             "type": "boolean"
           },
+          "sing": {
+            "title": "歌唱音声合成",
+            "type": "boolean"
+          },
           "synthesis_morphing": {
             "title": "2種類のスタイルでモーフィングした音声を合成",
             "type": "boolean"
@@ -872,7 +876,8 @@
           "adjust_intonation_scale",
           "adjust_volume_scale",
           "interrogative_upspeak",
-          "synthesis_morphing"
+          "synthesis_morphing",
+          "sing"
         ],
         "title": "SupportedFeatures",
         "type": "object"
diff --git a/voicevox_engine/engine_manifest/EngineManifest.py b/voicevox_engine/engine_manifest/EngineManifest.py
index f3a02e173..04e565942 100644
--- a/voicevox_engine/engine_manifest/EngineManifest.py
+++ b/voicevox_engine/engine_manifest/EngineManifest.py
@@ -42,6 +42,7 @@ class SupportedFeatures(BaseModel):
     adjust_volume_scale: bool = Field(title="全体の音量の調整")
     interrogative_upspeak: bool = Field(title="疑問文の自動調整")
     synthesis_morphing: bool = Field(title="2種類のスタイルでモーフィングした音声を合成")
+    sing: bool = Field(title="歌唱音声合成")
     manage_library: Optional[bool] = Field(title="音声ライブラリのインストール・アンインストール")
 
 

From e50f14abc1e5daebe0884bebe77c7ca3a5582910 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 31 Jan 2024 15:12:57 +0900
Subject: [PATCH 153/177] =?UTF-8?q?[release-0.16]=20style=5Ftype=E3=81=AE?=
 =?UTF-8?q?=E5=90=8D=E7=A7=B0=E3=82=92=E5=A4=89=E6=9B=B4=20(#1034)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* style_typeの名称を変更

* singersとsinger_infoのAPI追加

* テストを追加
---
 run.py                                        |  70 +++++++++---
 ...\343\202\222\347\242\272\350\252\215.json" | 100 +++++++++++++++-
 test/test_metas_store.py                      | 107 ++++++++++++++++++
 voicevox_engine/metas/MetasStore.py           |  27 ++++-
 4 files changed, 287 insertions(+), 17 deletions(-)
 create mode 100644 test/test_metas_store.py

diff --git a/run.py b/run.py
index cad34b038..4d228c7f0 100644
--- a/run.py
+++ b/run.py
@@ -13,7 +13,7 @@
 from io import BytesIO, TextIOWrapper
 from pathlib import Path
 from tempfile import NamedTemporaryFile, TemporaryFile
-from typing import Annotated, Any, Optional
+from typing import Annotated, Literal, Optional
 
 import soundfile
 import uvicorn
@@ -22,7 +22,7 @@
 from fastapi.openapi.utils import get_openapi
 from fastapi.responses import JSONResponse
 from fastapi.templating import Jinja2Templates
-from pydantic import ValidationError
+from pydantic import ValidationError, parse_obj_as
 from starlette.background import BackgroundTask
 from starlette.middleware.errors import ServerErrorMiddleware
 from starlette.responses import FileResponse
@@ -35,7 +35,11 @@
 from voicevox_engine.engine_manifest.EngineManifest import EngineManifest
 from voicevox_engine.library_manager import LibraryManager
 from voicevox_engine.metas.Metas import StyleId
-from voicevox_engine.metas.MetasStore import MetasStore, construct_lookup
+from voicevox_engine.metas.MetasStore import (
+    MetasStore,
+    construct_lookup,
+    filter_speakers_and_styles,
+)
 from voicevox_engine.model import (
     AccentPhrase,
     AudioQuery,
@@ -848,22 +852,30 @@ def core_versions() -> Response:
     def speakers(
         core_version: str | None = None,
     ) -> list[Speaker]:
-        return metas_store.load_combined_metas(get_core(core_version))
+        speakers = metas_store.load_combined_metas(get_core(core_version))
+        return filter_speakers_and_styles(speakers, "speaker")
 
     @app.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"])
     def speaker_info(
         speaker_uuid: str,
         core_version: str | None = None,
-    ) -> dict[str, Any]:
+    ) -> SpeakerInfo:
         """
         指定されたspeaker_uuidに関する情報をjson形式で返します。
         画像や音声はbase64エンコードされたものが返されます。
-
-        Returns
-        -------
-        ret_data: SpeakerInfo
         """
+        return _speaker_info(
+            speaker_uuid=speaker_uuid,
+            speaker_or_singer="speaker",
+            core_version=core_version,
+        )
 
+    # FIXME: この関数をどこかに切り出す
+    def _speaker_info(
+        speaker_uuid: str,
+        speaker_or_singer: Literal["speaker", "singer"],
+        core_version: str | None,
+    ) -> SpeakerInfo:
         # エンジンに含まれる話者メタ情報は、次のディレクトリ構造に従わなければならない：
         # {root_dir}/
         #   speaker_info/
@@ -888,9 +900,12 @@ def speaker_info(
         #           ...
 
         # 該当話者の検索
-        speakers = json.loads(get_core(core_version).speakers)
+        speakers = parse_obj_as(
+            list[Speaker], json.loads(get_core(core_version).speakers)
+        )
+        speakers = filter_speakers_and_styles(speakers, speaker_or_singer)
         for i in range(len(speakers)):
-            if speakers[i]["speaker_uuid"] == speaker_uuid:
+            if speakers[i].speaker_uuid == speaker_uuid:
                 speaker = speakers[i]
                 break
         else:
@@ -907,8 +922,8 @@ def speaker_info(
             portrait = b64encode_str(portrait_path.read_bytes())
             # スタイル情報の取得
             style_infos = []
-            for style in speaker["styles"]:
-                id = style["id"]
+            for style in speaker.styles:
+                id = style.id
                 # style icon
                 style_icon_path = speaker_path / "icons" / f"{id}.png"
                 icon = b64encode_str(style_icon_path.read_bytes())
@@ -941,10 +956,35 @@ def speaker_info(
             traceback.print_exc()
             raise HTTPException(status_code=500, detail="追加情報が見つかりませんでした")
 
-        ret_data = {"policy": policy, "portrait": portrait, "style_infos": style_infos}
-
+        ret_data = SpeakerInfo(
+            policy=policy,
+            portrait=portrait,
+            style_infos=style_infos,
+        )
         return ret_data
 
+    @app.get("/singers", response_model=list[Speaker], tags=["その他"])
+    def singers(
+        core_version: str | None = None,
+    ) -> list[Speaker]:
+        singers = metas_store.load_combined_metas(get_core(core_version))
+        return filter_speakers_and_styles(singers, "singer")
+
+    @app.get("/singer_info", response_model=SpeakerInfo, tags=["その他"])
+    def singer_info(
+        speaker_uuid: str,
+        core_version: str | None = None,
+    ) -> SpeakerInfo:
+        """
+        指定されたspeaker_uuidに関する情報をjson形式で返します。
+        画像や音声はbase64エンコードされたものが返されます。
+        """
+        return _speaker_info(
+            speaker_uuid=speaker_uuid,
+            speaker_or_singer="singer",
+            core_version=core_version,
+        )
+
     if engine_manifest_data.supported_features.manage_library:
 
         @app.get(
diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index 7affe7ef7..616cd71b0 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -2319,9 +2319,107 @@
         ]
       }
     },
+    "/singer_info": {
+      "get": {
+        "description": "指定されたspeaker_uuidに関する情報をjson形式で返します。\n画像や音声はbase64エンコードされたものが返されます。",
+        "operationId": "singer_info_singer_info_get",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "speaker_uuid",
+            "required": true,
+            "schema": {
+              "title": "Speaker Uuid",
+              "type": "string"
+            }
+          },
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SpeakerInfo"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Singer Info",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
+    "/singers": {
+      "get": {
+        "operationId": "singers_singers_get",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "core_version",
+            "required": false,
+            "schema": {
+              "title": "Core Version",
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "items": {
+                    "$ref": "#/components/schemas/Speaker"
+                  },
+                  "title": "Response Singers Singers Get",
+                  "type": "array"
+                }
+              }
+            },
+            "description": "Successful Response"
+          },
+          "422": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            },
+            "description": "Validation Error"
+          }
+        },
+        "summary": "Singers",
+        "tags": [
+          "その他"
+        ]
+      }
+    },
     "/speaker_info": {
       "get": {
-        "description": "指定されたspeaker_uuidに関する情報をjson形式で返します。\n画像や音声はbase64エンコードされたものが返されます。\n\nReturns\n-------\nret_data: SpeakerInfo",
+        "description": "指定されたspeaker_uuidに関する情報をjson形式で返します。\n画像や音声はbase64エンコードされたものが返されます。",
         "operationId": "speaker_info_speaker_info_get",
         "parameters": [
           {
diff --git a/test/test_metas_store.py b/test/test_metas_store.py
new file mode 100644
index 000000000..cf354928a
--- /dev/null
+++ b/test/test_metas_store.py
@@ -0,0 +1,107 @@
+import uuid
+from unittest import TestCase
+
+from voicevox_engine.metas.Metas import Speaker, SpeakerStyle, StyleType
+from voicevox_engine.metas.MetasStore import filter_speakers_and_styles
+
+
+def _gen_speaker(style_types: list[StyleType]) -> Speaker:
+    return Speaker(
+        speaker_uuid=str(uuid.uuid4()),
+        name="",
+        styles=[
+            SpeakerStyle(
+                name="",
+                id=0,
+                type=style_type,
+            )
+            for style_type in style_types
+        ],
+    )
+
+
+def _equal_speakers(a: list[Speaker], b: list[Speaker]) -> bool:
+    if len(a) != len(b):
+        return False
+    for i in range(len(a)):
+        if a[i].speaker_uuid != b[i].speaker_uuid:
+            return False
+    return True
+
+
+class TestMetasStore(TestCase):
+    def test_filter_speakers_and_styles_with_speaker(self):
+        # Inputs
+        speaker_talk_only = _gen_speaker(["talk"])
+        speaker_singing_teacher_only = _gen_speaker(["singing_teacher"])
+        speaker_frame_decode_only = _gen_speaker(["frame_decode"])
+        speaker_sing_only = _gen_speaker(["sing"])
+        speaker_allstyle = _gen_speaker(
+            ["talk", "singing_teacher", "frame_decode", "sing"]
+        )
+
+        # Outputs
+        result = filter_speakers_and_styles(
+            [
+                speaker_talk_only,
+                speaker_singing_teacher_only,
+                speaker_frame_decode_only,
+                speaker_sing_only,
+                speaker_allstyle,
+            ],
+            "speaker",
+        )
+
+        # Tests
+        self.assertEqual(len(result), 2)
+
+        # 話者だけになっている
+        self.assertTrue(_equal_speakers(result, [speaker_talk_only, speaker_allstyle]))
+
+        # スタイルがフィルタリングされている
+        for speaker in result:
+            for style in speaker.styles:
+                self.assertEqual(style.type, "talk")
+
+    def test_filter_speakers_and_styles_with_singer(self):
+        # Inputs
+        speaker_talk_only = _gen_speaker(["talk"])
+        speaker_singing_teacher_only = _gen_speaker(["singing_teacher"])
+        speaker_frame_decode_only = _gen_speaker(["frame_decode"])
+        speaker_sing_only = _gen_speaker(["sing"])
+        speaker_allstyle = _gen_speaker(
+            ["talk", "singing_teacher", "frame_decode", "sing"]
+        )
+
+        # Outputs
+        result = filter_speakers_and_styles(
+            [
+                speaker_talk_only,
+                speaker_singing_teacher_only,
+                speaker_frame_decode_only,
+                speaker_sing_only,
+                speaker_allstyle,
+            ],
+            "singer",
+        )
+
+        # Tests
+        self.assertEqual(len(result), 4)
+
+        # 歌手だけになっている
+        self.assertTrue(
+            _equal_speakers(
+                result,
+                [
+                    speaker_singing_teacher_only,
+                    speaker_frame_decode_only,
+                    speaker_sing_only,
+                    speaker_allstyle,
+                ],
+            )
+        )
+
+        # スタイルがフィルタリングされている
+        for speaker in result:
+            for style in speaker.styles:
+                self.assertIn(style.type, ["singing_teacher", "frame_decode", "sing"])
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 2608ff4ff..3bb0bbab1 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -1,6 +1,7 @@
 import json
+from copy import deepcopy
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Tuple
+from typing import TYPE_CHECKING, Dict, List, Literal, Tuple
 
 from voicevox_engine.metas.Metas import (
     CoreSpeaker,
@@ -8,6 +9,7 @@
     Speaker,
     SpeakerStyle,
     StyleId,
+    StyleType,
 )
 
 if TYPE_CHECKING:
@@ -79,3 +81,26 @@ def construct_lookup(
         for style in speaker.styles:
             lookup_table[style.id] = (speaker, style)
     return lookup_table
+
+
+def filter_speakers_and_styles(
+    speakers: list[Speaker],
+    speaker_or_singer: Literal["speaker", "singer"],
+) -> list[Speaker]:
+    """
+    話者・スタイルをフィルタリングする。
+    speakerの場合はトーク系スタイルのみ、singerの場合はソング系スタイルのみを残す。
+    スタイル数が0になった話者は除外する。
+    """
+    style_types: list[StyleType]
+    if speaker_or_singer == "speaker":
+        style_types = ["talk"]
+    elif speaker_or_singer == "singer":
+        style_types = ["singing_teacher", "frame_decode", "sing"]
+
+    speakers = deepcopy(speakers)
+    for speaker in speakers:
+        speaker.styles = [
+            style for style in speaker.styles if style.type in style_types
+        ]
+    return [speaker for speaker in speakers if len(speaker.styles) > 0]

From 0f06aeb9d6e1efb8683ebafcbe7a1d82ca09cec3 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 31 Jan 2024 15:43:14 +0900
Subject: [PATCH 154/177] to 0.16.0 (#1036)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* to-0.16.0

* スタイル追加も言及
---
 engine_manifest_assets/update_infos.json | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/engine_manifest_assets/update_infos.json b/engine_manifest_assets/update_infos.json
index 529ff9a91..466f5de04 100644
--- a/engine_manifest_assets/update_infos.json
+++ b/engine_manifest_assets/update_infos.json
@@ -1,7 +1,11 @@
 [
   {
     "version": "0.16.0",
-    "descriptions": ["ハミング用のAPIを追加"],
+    "descriptions": [
+      "ソングAPIを追加",
+      "キャラクター「四国めたん」「ずんだもん」「春日部つむぎ」「雨晴はう」「波音リツ」のハミングを追加",
+      "キャラクター「波音リツ」のソングを追加"
+    ],
     "contributors": ["Hiroshiba", "y-chan"]
   },
   {

From 5e9931c4a04617c006fbb085a04e6e7bd2502b7c Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 31 Jan 2024 16:05:32 +0900
Subject: [PATCH 155/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0feature=E3=81=AEsing?=
 =?UTF-8?q?=E3=82=92optional=E3=81=AB=20(#1037)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

追加feature sing をoptionalに
---
 ...1\223\343\201\250\343\202\222\347\242\272\350\252\215.json" | 3 +--
 voicevox_engine/engine_manifest/EngineManifest.py              | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index 616cd71b0..6a173a16e 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -876,8 +876,7 @@
           "adjust_intonation_scale",
           "adjust_volume_scale",
           "interrogative_upspeak",
-          "synthesis_morphing",
-          "sing"
+          "synthesis_morphing"
         ],
         "title": "SupportedFeatures",
         "type": "object"
diff --git a/voicevox_engine/engine_manifest/EngineManifest.py b/voicevox_engine/engine_manifest/EngineManifest.py
index 04e565942..02fd87981 100644
--- a/voicevox_engine/engine_manifest/EngineManifest.py
+++ b/voicevox_engine/engine_manifest/EngineManifest.py
@@ -42,7 +42,7 @@ class SupportedFeatures(BaseModel):
     adjust_volume_scale: bool = Field(title="全体の音量の調整")
     interrogative_upspeak: bool = Field(title="疑問文の自動調整")
     synthesis_morphing: bool = Field(title="2種類のスタイルでモーフィングした音声を合成")
-    sing: bool = Field(title="歌唱音声合成")
+    sing: Optional[bool] = Field(title="歌唱音声合成")
     manage_library: Optional[bool] = Field(title="音声ライブラリのインストール・アンインストール")
 
 

From 86fff41548d6e2f749e1053df3750bcb7ac68483 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 2 Feb 2024 08:12:53 +0900
Subject: [PATCH 156/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`.phoneme`=20?=
 =?UTF-8?q?=E3=83=97=E3=83=A9=E3=82=A4=E3=83=99=E3=83=BC=E3=83=88=E5=8C=96?=
 =?UTF-8?q?=20(#1002)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `.phoneme` プライベート化
---
 test/tts_pipeline/test_acoustic_feature_extractor.py      | 2 +-
 test/tts_pipeline/test_tts_engine.py                      | 2 +-
 .../tts_pipeline/acoustic_feature_extractor.py            | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/tts_pipeline/test_acoustic_feature_extractor.py b/test/tts_pipeline/test_acoustic_feature_extractor.py
index 4bf76267a..679e981c1 100644
--- a/test/tts_pipeline/test_acoustic_feature_extractor.py
+++ b/test/tts_pipeline/test_acoustic_feature_extractor.py
@@ -34,7 +34,7 @@ def test_const(self):
 
     def test_convert(self):
         sil_phoneme = Phoneme("sil")
-        self.assertEqual(sil_phoneme.phoneme, "pau")
+        self.assertEqual(sil_phoneme._phoneme, "pau")
 
     def test_phoneme_id(self):
         ojt_str_hello_hiho = " ".join([str(p.id) for p in self.ojt_hello_hiho])
diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index 73598b5fe..b29fa0f65 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -132,7 +132,7 @@ def test_to_flatten_phonemes():
     true_phonemes = ["pau", "h", "i", "pau"]
 
     # Outputs
-    phonemes = list(map(lambda p: p.phoneme, to_flatten_phonemes(moras)))
+    phonemes = list(map(lambda p: p._phoneme, to_flatten_phonemes(moras)))
 
     assert true_phonemes == phonemes
 
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
index 3d0bfa6ad..2d1642532 100644
--- a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
+++ b/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
@@ -115,14 +115,14 @@ def __init__(self, phoneme: str):
         if "sil" in phoneme:
             phoneme = "pau"
 
-        self.phoneme = phoneme
+        self._phoneme = phoneme
         # TODO: `phoneme` で受け入れ可能な文字列を型で保証
         # self.phoneme: Vowel | Consonant = phoneme
 
     @property
     def id(self) -> int:
         """音素ID (音素リスト内でのindex) を取得する"""
-        return self._PHONEME_LIST.index(self.phoneme)
+        return self._PHONEME_LIST.index(self._phoneme)
 
     @property
     def onehot(self) -> NDArray[np.float32]:
@@ -133,8 +133,8 @@ def onehot(self) -> NDArray[np.float32]:
 
     def is_mora_tail(self) -> bool:
         """この音素はモーラ末尾音素（母音・撥音・促音・無音）である"""
-        return self.phoneme in MORA_TAIL_PHONEMES
+        return self._phoneme in MORA_TAIL_PHONEMES
 
     def is_unvoiced_mora_tail(self) -> bool:
         """この音素は無声のモーラ末尾音素（無声母音・促音・無音）である"""
-        return self.phoneme in UNVOICED_MORA_TAIL_PHONEMES
+        return self._phoneme in UNVOICED_MORA_TAIL_PHONEMES

From 80fa8ecd58e359e354cd96cdd8e6ab1afc6f266c Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 2 Feb 2024 08:30:49 +0900
Subject: [PATCH 157/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=A6?=
 =?UTF-8?q?=E3=83=BC=E3=82=B6=E3=83=BC=E8=BE=9E=E6=9B=B8=E3=83=86=E3=82=B9?=
 =?UTF-8?q?=E3=83=88=E6=A7=8B=E9=80=A0=E5=8C=96=20(#1009)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: ユーザー辞書テスト構造化

* user_dictionaly→user_dict

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/{ => user_dict}/test_user_dict.py       | 0
 test/{ => user_dict}/test_user_dict_model.py | 0
 test/{ => user_dict}/test_word_types.py      | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename test/{ => user_dict}/test_user_dict.py (100%)
 rename test/{ => user_dict}/test_user_dict_model.py (100%)
 rename test/{ => user_dict}/test_word_types.py (100%)

diff --git a/test/test_user_dict.py b/test/user_dict/test_user_dict.py
similarity index 100%
rename from test/test_user_dict.py
rename to test/user_dict/test_user_dict.py
diff --git a/test/test_user_dict_model.py b/test/user_dict/test_user_dict_model.py
similarity index 100%
rename from test/test_user_dict_model.py
rename to test/user_dict/test_user_dict_model.py
diff --git a/test/test_word_types.py b/test/user_dict/test_word_types.py
similarity index 100%
rename from test/test_word_types.py
rename to test/user_dict/test_word_types.py

From 0094d9c62826e34e3c70f3f164b37ac2b1ef2a18 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 2 Feb 2024 08:35:38 +0900
Subject: [PATCH 158/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=82=BB?=
 =?UTF-8?q?=E3=83=83=E3=83=86=E3=82=A3=E3=83=B3=E3=82=B0=E3=83=86=E3=82=B9?=
 =?UTF-8?q?=E3=83=88=E6=A7=8B=E9=80=A0=E5=8C=96=20(#1011)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: セッティングテスト構造化
---
 test/{ => setting}/setting-test-load-1.yaml | 0
 test/{ => setting}/setting-test-load-2.yaml | 0
 test/{ => setting}/setting-test-load-3.yaml | 0
 test/{ => setting}/test_setting.py          | 6 +++---
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename test/{ => setting}/setting-test-load-1.yaml (100%)
 rename test/{ => setting}/setting-test-load-2.yaml (100%)
 rename test/{ => setting}/setting-test-load-3.yaml (100%)
 rename test/{ => setting}/test_setting.py (90%)

diff --git a/test/setting-test-load-1.yaml b/test/setting/setting-test-load-1.yaml
similarity index 100%
rename from test/setting-test-load-1.yaml
rename to test/setting/setting-test-load-1.yaml
diff --git a/test/setting-test-load-2.yaml b/test/setting/setting-test-load-2.yaml
similarity index 100%
rename from test/setting-test-load-2.yaml
rename to test/setting/setting-test-load-2.yaml
diff --git a/test/setting-test-load-3.yaml b/test/setting/setting-test-load-3.yaml
similarity index 100%
rename from test/setting-test-load-3.yaml
rename to test/setting/setting-test-load-3.yaml
diff --git a/test/test_setting.py b/test/setting/test_setting.py
similarity index 90%
rename from test/test_setting.py
rename to test/setting/test_setting.py
index 494e3095e..25bb9f856 100644
--- a/test/test_setting.py
+++ b/test/setting/test_setting.py
@@ -21,7 +21,7 @@ def test_loading_1(self):
 
     def test_loading_2(self):
         setting_loader = SettingLoader(
-            setting_file_path=Path("test/setting-test-load-1.yaml")
+            setting_file_path=Path("test/setting/setting-test-load-1.yaml")
         )
         settings = setting_loader.load_setting_file()
 
@@ -32,7 +32,7 @@ def test_loading_2(self):
 
     def test_loading_3(self):
         setting_loader = SettingLoader(
-            setting_file_path=Path("test/setting-test-load-2.yaml")
+            setting_file_path=Path("test/setting/setting-test-load-2.yaml")
         )
         settings = setting_loader.load_setting_file()
 
@@ -43,7 +43,7 @@ def test_loading_3(self):
 
     def test_loading_4(self):
         setting_loader = SettingLoader(
-            setting_file_path=Path("test/setting-test-load-3.yaml")
+            setting_file_path=Path("test/setting/setting-test-load-3.yaml")
         )
         settings = setting_loader.load_setting_file()
 

From 2a6273e6305e15e306991fd719090273780cde78 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Fri, 2 Feb 2024 08:37:18 +0900
Subject: [PATCH 159/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E3=83=97?=
 =?UTF-8?q?=E3=83=AA=E3=82=BB=E3=83=83=E3=83=88=E3=83=86=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E6=A7=8B=E9=80=A0=E5=8C=96=20(#1010)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: プリセットテストの構造化

* path_*→*_path

---------

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 test/{ => preset}/presets-test-1.yaml |  0
 test/{ => preset}/presets-test-2.yaml |  0
 test/{ => preset}/presets-test-3.yaml |  0
 test/{ => preset}/presets-test-4.yaml |  0
 test/{ => preset}/test_preset.py      | 41 +++++++++++++++------------
 5 files changed, 23 insertions(+), 18 deletions(-)
 rename test/{ => preset}/presets-test-1.yaml (100%)
 rename test/{ => preset}/presets-test-2.yaml (100%)
 rename test/{ => preset}/presets-test-3.yaml (100%)
 rename test/{ => preset}/presets-test-4.yaml (100%)
 rename test/{ => preset}/test_preset.py (88%)

diff --git a/test/presets-test-1.yaml b/test/preset/presets-test-1.yaml
similarity index 100%
rename from test/presets-test-1.yaml
rename to test/preset/presets-test-1.yaml
diff --git a/test/presets-test-2.yaml b/test/preset/presets-test-2.yaml
similarity index 100%
rename from test/presets-test-2.yaml
rename to test/preset/presets-test-2.yaml
diff --git a/test/presets-test-3.yaml b/test/preset/presets-test-3.yaml
similarity index 100%
rename from test/presets-test-3.yaml
rename to test/preset/presets-test-3.yaml
diff --git a/test/presets-test-4.yaml b/test/preset/presets-test-4.yaml
similarity index 100%
rename from test/presets-test-4.yaml
rename to test/preset/presets-test-4.yaml
diff --git a/test/test_preset.py b/test/preset/test_preset.py
similarity index 88%
rename from test/test_preset.py
rename to test/preset/test_preset.py
index bbbc608b8..0922423d2 100644
--- a/test/test_preset.py
+++ b/test/preset/test_preset.py
@@ -6,6 +6,11 @@
 
 from voicevox_engine.preset import Preset, PresetError, PresetManager
 
+presets_test_1_yaml_path = Path("test/preset/presets-test-1.yaml")
+presets_test_2_yaml_path = Path("test/preset/presets-test-2.yaml")
+presets_test_3_yaml_path = Path("test/preset/presets-test-3.yaml")
+presets_test_4_yaml_path = Path("test/preset/presets-test-4.yaml")
+
 
 class TestPresetManager(TestCase):
     def setUp(self):
@@ -16,29 +21,29 @@ def tearDown(self):
         self.tmp_dir.cleanup()
 
     def test_validation(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-1.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_1_yaml_path)
         presets = preset_manager.load_presets()
         self.assertFalse(presets is None)
 
     def test_validation_same(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-1.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_1_yaml_path)
         presets = preset_manager.load_presets()
         presets2 = preset_manager.load_presets()
         self.assertFalse(presets is None)
         self.assertEqual(presets, presets2)
 
     def test_validation_2(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-2.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_2_yaml_path)
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルにミスがあります"):
             preset_manager.load_presets()
 
     def test_preset_id(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-3.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_3_yaml_path)
         with self.assertRaises(PresetError, msg="プリセットのidに重複があります"):
             preset_manager.load_presets()
 
     def test_empty_file(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-4.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_4_yaml_path)
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルが空の内容です"):
             preset_manager.load_presets()
 
@@ -49,7 +54,7 @@ def test_not_exist_file(self):
 
     def test_add_preset(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset = Preset(
             **{
@@ -74,7 +79,7 @@ def test_add_preset(self):
         remove(temp_path)
 
     def test_add_preset_load_failure(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-2.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_2_yaml_path)
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルにミスがあります"):
             preset_manager.add_preset(
                 Preset(
@@ -95,7 +100,7 @@ def test_add_preset_load_failure(self):
 
     def test_add_preset_conflict_id(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset = Preset(
             **{
@@ -121,7 +126,7 @@ def test_add_preset_conflict_id(self):
 
     def test_add_preset_conflict_id2(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset = Preset(
             **{
@@ -147,7 +152,7 @@ def test_add_preset_conflict_id2(self):
 
     def test_add_preset_write_failure(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset = Preset(
             **{
@@ -173,7 +178,7 @@ def test_add_preset_write_failure(self):
 
     def test_update_preset(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset = Preset(
             **{
@@ -198,7 +203,7 @@ def test_update_preset(self):
         remove(temp_path)
 
     def test_update_preset_load_failure(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-2.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_2_yaml_path)
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルにミスがあります"):
             preset_manager.update_preset(
                 Preset(
@@ -219,7 +224,7 @@ def test_update_preset_load_failure(self):
 
     def test_update_preset_not_found(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset = Preset(
             **{
@@ -242,7 +247,7 @@ def test_update_preset_not_found(self):
 
     def test_update_preset_write_failure(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset = Preset(
             **{
@@ -269,7 +274,7 @@ def test_update_preset_write_failure(self):
 
     def test_delete_preset(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         id = preset_manager.delete_preset(1)
         self.assertEqual(id, 1)
@@ -277,13 +282,13 @@ def test_delete_preset(self):
         remove(temp_path)
 
     def test_delete_preset_load_failure(self):
-        preset_manager = PresetManager(preset_path=Path("test/presets-test-2.yaml"))
+        preset_manager = PresetManager(preset_path=presets_test_2_yaml_path)
         with self.assertRaises(PresetError, msg="プリセットの設定ファイルにミスがあります"):
             preset_manager.delete_preset(10)
 
     def test_delete_preset_not_found(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         with self.assertRaises(PresetError, msg="削除対象のプリセットが存在しません"):
             preset_manager.delete_preset(10)
@@ -292,7 +297,7 @@ def test_delete_preset_not_found(self):
 
     def test_delete_preset_write_failure(self):
         temp_path = self.tmp_dir_path / "presets-test-temp.yaml"
-        copyfile(Path("test/presets-test-1.yaml"), temp_path)
+        copyfile(presets_test_1_yaml_path, temp_path)
         preset_manager = PresetManager(preset_path=temp_path)
         preset_manager.load_presets()
         preset_manager.load_presets = lambda: []  # type:ignore[method-assign]

From af8cde19b26e8dc27d470da504ae64b1fcef0bcc Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 3 Feb 2024 07:51:35 +0900
Subject: [PATCH 160/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`core`=E3=83=A2?=
 =?UTF-8?q?=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E7=A7=BB=E6=A4=8D=20(#1039?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `core`モジュールへの移植
---
 run.py                                         | 4 ++--
 test/e2e/conftest.py                           | 2 +-
 voicevox_engine/cancellable_engine.py          | 2 +-
 voicevox_engine/{ => core}/core_adapter.py     | 2 +-
 voicevox_engine/{ => core}/core_initializer.py | 6 +++---
 voicevox_engine/{ => core}/core_wrapper.py     | 0
 voicevox_engine/dev/core/mock.py               | 2 +-
 voicevox_engine/metas/MetasStore.py            | 2 +-
 voicevox_engine/morphing.py                    | 2 +-
 voicevox_engine/tts_pipeline/tts_engine.py     | 4 ++--
 10 files changed, 13 insertions(+), 13 deletions(-)
 rename voicevox_engine/{ => core}/core_adapter.py (99%)
 rename voicevox_engine/{ => core}/core_initializer.py (97%)
 rename voicevox_engine/{ => core}/core_wrapper.py (100%)

diff --git a/run.py b/run.py
index 4d228c7f0..4e408de13 100644
--- a/run.py
+++ b/run.py
@@ -29,8 +29,8 @@
 
 from voicevox_engine import __version__
 from voicevox_engine.cancellable_engine import CancellableEngine
-from voicevox_engine.core_adapter import CoreAdapter
-from voicevox_engine.core_initializer import initialize_cores
+from voicevox_engine.core.core_adapter import CoreAdapter
+from voicevox_engine.core.core_initializer import initialize_cores
 from voicevox_engine.engine_manifest import EngineManifestLoader
 from voicevox_engine.engine_manifest.EngineManifest import EngineManifest
 from voicevox_engine.library_manager import LibraryManager
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index d0de01527..aff8d5977 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -5,7 +5,7 @@
 from fastapi.testclient import TestClient
 from run import generate_app
 
-from voicevox_engine.core_initializer import initialize_cores
+from voicevox_engine.core.core_initializer import initialize_cores
 from voicevox_engine.preset import PresetManager
 from voicevox_engine.setting import SettingLoader
 from voicevox_engine.tts_pipeline.tts_engine import make_tts_engines_from_cores
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index f3f8200ef..c94fd282c 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -16,7 +16,7 @@
 # FIXME: remove FastAPI dependency
 from fastapi import HTTPException, Request
 
-from .core_initializer import initialize_cores
+from .core.core_initializer import initialize_cores
 from .metas.Metas import StyleId
 from .model import AudioQuery
 from .tts_pipeline.tts_engine import make_tts_engines_from_cores
diff --git a/voicevox_engine/core_adapter.py b/voicevox_engine/core/core_adapter.py
similarity index 99%
rename from voicevox_engine/core_adapter.py
rename to voicevox_engine/core/core_adapter.py
index 5ff5d61f2..72f3068ad 100644
--- a/voicevox_engine/core_adapter.py
+++ b/voicevox_engine/core/core_adapter.py
@@ -3,8 +3,8 @@
 import numpy as np
 from numpy.typing import NDArray
 
+from ..metas.Metas import StyleId
 from .core_wrapper import CoreWrapper, OldCoreError
-from .metas.Metas import StyleId
 
 
 class CoreAdapter:
diff --git a/voicevox_engine/core_initializer.py b/voicevox_engine/core/core_initializer.py
similarity index 97%
rename from voicevox_engine/core_initializer.py
rename to voicevox_engine/core/core_initializer.py
index 4832ee805..3a5b1bf9a 100644
--- a/voicevox_engine/core_initializer.py
+++ b/voicevox_engine/core/core_initializer.py
@@ -3,9 +3,9 @@
 from pathlib import Path
 from typing import List, Optional
 
+from ..tts_pipeline.tts_engine import CoreAdapter
+from ..utility import engine_root, get_save_dir
 from .core_wrapper import CoreWrapper, load_runtime_lib
-from .tts_pipeline.tts_engine import CoreAdapter
-from .utility import engine_root, get_save_dir
 
 MOCK_VER = "0.0.0"
 
@@ -129,7 +129,7 @@ def load_core_library(core_dir: Path, suppress_error: bool = False) -> None:
 
     else:
         # モック追加
-        from .dev.core import MockCoreWrapper
+        from ..dev.core import MockCoreWrapper
 
         if MOCK_VER not in cores:
             print("Info: Loading mock.")
diff --git a/voicevox_engine/core_wrapper.py b/voicevox_engine/core/core_wrapper.py
similarity index 100%
rename from voicevox_engine/core_wrapper.py
rename to voicevox_engine/core/core_wrapper.py
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index aa468234b..d0862d80c 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numpy.typing import NDArray
 
-from ...core_wrapper import CoreWrapper
+from ...core.core_wrapper import CoreWrapper
 
 
 class MockCoreWrapper(CoreWrapper):
diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py
index 3bb0bbab1..bcea6c1ce 100644
--- a/voicevox_engine/metas/MetasStore.py
+++ b/voicevox_engine/metas/MetasStore.py
@@ -13,7 +13,7 @@
 )
 
 if TYPE_CHECKING:
-    from voicevox_engine.core_adapter import CoreAdapter
+    from voicevox_engine.core.core_adapter import CoreAdapter
 
 
 class MetasStore:
diff --git a/voicevox_engine/morphing.py b/voicevox_engine/morphing.py
index 957d2d279..5ebef82bd 100644
--- a/voicevox_engine/morphing.py
+++ b/voicevox_engine/morphing.py
@@ -12,7 +12,7 @@
 from numpy.typing import NDArray
 from soxr import resample
 
-from .core_adapter import CoreAdapter
+from .core.core_adapter import CoreAdapter
 from .metas.Metas import (
     Speaker,
     SpeakerStyle,
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 45faea35e..d239d5c3e 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -6,8 +6,8 @@
 from numpy.typing import NDArray
 from soxr import resample
 
-from ..core_adapter import CoreAdapter
-from ..core_wrapper import CoreWrapper
+from ..core.core_adapter import CoreAdapter
+from ..core.core_wrapper import CoreWrapper
 from ..metas.Metas import StyleId
 from ..model import AccentPhrase, AudioQuery, FrameAudioQuery, FramePhoneme, Mora, Score
 from .acoustic_feature_extractor import Phoneme

From 10286326c756047e49742848b98c955f47968234 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 4 Feb 2024 09:28:48 +0900
Subject: [PATCH 161/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`=5F=5Finit=5F=5F?=
 =?UTF-8?q?.py`=20=E3=82=A4=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88/=E3=82=A8?=
 =?UTF-8?q?=E3=82=AF=E3=82=B9=E3=83=9D=E3=83=BC=E3=83=88=E5=BB=83=E6=AD=A2?=
 =?UTF-8?q?=20(#1040)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: __init__.py インポート/エクスポート廃止
---
 build_util/make_docs.py                     |  8 ++++----
 run.py                                      | 22 +++++++++------------
 test/e2e/conftest.py                        |  4 ++--
 test/preset/test_preset.py                  |  4 +++-
 test/setting/test_setting.py                |  3 ++-
 test/test_connect_base64_waves.py           |  5 ++++-
 test/test_core_version_utility.py           |  5 ++++-
 test/test_mock_tts_engine.py                |  2 +-
 voicevox_engine/cancellable_engine.py       |  2 +-
 voicevox_engine/core/__init__.py            |  0
 voicevox_engine/core/core_initializer.py    |  4 ++--
 voicevox_engine/dev/__init__.py             |  0
 voicevox_engine/dev/core/__init__.py        |  3 ---
 voicevox_engine/dev/tts_engine/__init__.py  |  3 ---
 voicevox_engine/engine_manifest/__init__.py |  7 -------
 voicevox_engine/metas/__init__.py           |  6 ------
 voicevox_engine/preset/__init__.py          |  9 ---------
 voicevox_engine/setting/SettingLoader.py    |  2 +-
 voicevox_engine/setting/__init__.py         |  9 ---------
 voicevox_engine/tts_pipeline/__init__.py    |  0
 voicevox_engine/tts_pipeline/tts_engine.py  |  2 +-
 voicevox_engine/user_dict.py                |  3 ++-
 voicevox_engine/utility/__init__.py         | 20 -------------------
 23 files changed, 36 insertions(+), 87 deletions(-)
 create mode 100644 voicevox_engine/core/__init__.py
 create mode 100644 voicevox_engine/dev/__init__.py
 create mode 100644 voicevox_engine/tts_pipeline/__init__.py

diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index bde98cead..e36295bf6 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -1,12 +1,12 @@
 import json
 from pathlib import Path
 
-from voicevox_engine.dev.core import MockCoreWrapper
+from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.dev.tts_engine.mock import MockTTSEngine
-from voicevox_engine.preset import PresetManager
-from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
+from voicevox_engine.preset.PresetManager import PresetManager
+from voicevox_engine.setting.SettingLoader import USER_SETTING_PATH, SettingLoader
 from voicevox_engine.tts_pipeline.tts_engine import CoreAdapter
-from voicevox_engine.utility import engine_root
+from voicevox_engine.utility.path_utility import engine_root
 
 
 def generate_api_docs_html(schema: str) -> str:
diff --git a/run.py b/run.py
index 4e408de13..15d18783a 100644
--- a/run.py
+++ b/run.py
@@ -31,8 +31,8 @@
 from voicevox_engine.cancellable_engine import CancellableEngine
 from voicevox_engine.core.core_adapter import CoreAdapter
 from voicevox_engine.core.core_initializer import initialize_cores
-from voicevox_engine.engine_manifest import EngineManifestLoader
 from voicevox_engine.engine_manifest.EngineManifest import EngineManifest
+from voicevox_engine.engine_manifest.EngineManifestLoader import EngineManifestLoader
 from voicevox_engine.library_manager import LibraryManager
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.metas.MetasStore import (
@@ -68,13 +68,11 @@
     synthesis_morphing_parameter as _synthesis_morphing_parameter,
 )
 from voicevox_engine.part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY
-from voicevox_engine.preset import Preset, PresetError, PresetManager
-from voicevox_engine.setting import (
-    USER_SETTING_PATH,
-    CorsPolicyMode,
-    Setting,
-    SettingLoader,
-)
+from voicevox_engine.preset.Preset import Preset
+from voicevox_engine.preset.PresetError import PresetError
+from voicevox_engine.preset.PresetManager import PresetManager
+from voicevox_engine.setting.Setting import CorsPolicyMode, Setting
+from voicevox_engine.setting.SettingLoader import USER_SETTING_PATH, SettingLoader
 from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
@@ -88,14 +86,12 @@
     rewrite_word,
     update_dict,
 )
-from voicevox_engine.utility import (
+from voicevox_engine.utility.connect_base64_waves import (
     ConnectBase64WavesException,
     connect_base64_waves,
-    delete_file,
-    engine_root,
-    get_latest_core_version,
-    get_save_dir,
 )
+from voicevox_engine.utility.core_version_utility import get_latest_core_version
+from voicevox_engine.utility.path_utility import delete_file, engine_root, get_save_dir
 from voicevox_engine.utility.run_utility import decide_boolean_from_env
 
 
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index aff8d5977..a2979e3ba 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -6,8 +6,8 @@
 from run import generate_app
 
 from voicevox_engine.core.core_initializer import initialize_cores
-from voicevox_engine.preset import PresetManager
-from voicevox_engine.setting import SettingLoader
+from voicevox_engine.preset.PresetManager import PresetManager
+from voicevox_engine.setting.SettingLoader import SettingLoader
 from voicevox_engine.tts_pipeline.tts_engine import make_tts_engines_from_cores
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
diff --git a/test/preset/test_preset.py b/test/preset/test_preset.py
index 0922423d2..ffa0d698c 100644
--- a/test/preset/test_preset.py
+++ b/test/preset/test_preset.py
@@ -4,7 +4,9 @@
 from tempfile import TemporaryDirectory
 from unittest import TestCase
 
-from voicevox_engine.preset import Preset, PresetError, PresetManager
+from voicevox_engine.preset.Preset import Preset
+from voicevox_engine.preset.PresetError import PresetError
+from voicevox_engine.preset.PresetManager import PresetManager
 
 presets_test_1_yaml_path = Path("test/preset/presets-test-1.yaml")
 presets_test_2_yaml_path = Path("test/preset/presets-test-2.yaml")
diff --git a/test/setting/test_setting.py b/test/setting/test_setting.py
index 25bb9f856..0a05083ba 100644
--- a/test/setting/test_setting.py
+++ b/test/setting/test_setting.py
@@ -2,7 +2,8 @@
 from tempfile import TemporaryDirectory
 from unittest import TestCase
 
-from voicevox_engine.setting import CorsPolicyMode, Setting, SettingLoader
+from voicevox_engine.setting.Setting import CorsPolicyMode, Setting
+from voicevox_engine.setting.SettingLoader import SettingLoader
 
 
 class TestSettingLoader(TestCase):
diff --git a/test/test_connect_base64_waves.py b/test/test_connect_base64_waves.py
index 88739d227..96cf92d1b 100644
--- a/test/test_connect_base64_waves.py
+++ b/test/test_connect_base64_waves.py
@@ -8,7 +8,10 @@
 from numpy.typing import NDArray
 from soxr import resample
 
-from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves
+from voicevox_engine.utility.connect_base64_waves import (
+    ConnectBase64WavesException,
+    connect_base64_waves,
+)
 
 
 def generate_sine_wave_ndarray(
diff --git a/test/test_core_version_utility.py b/test/test_core_version_utility.py
index e96ba8009..7ac191011 100644
--- a/test/test_core_version_utility.py
+++ b/test/test_core_version_utility.py
@@ -1,6 +1,9 @@
 from unittest import TestCase
 
-from voicevox_engine.utility import get_latest_core_version, parse_core_version
+from voicevox_engine.utility.core_version_utility import (
+    get_latest_core_version,
+    parse_core_version,
+)
 
 
 class TestCoreVersion(TestCase):
diff --git a/test/test_mock_tts_engine.py b/test/test_mock_tts_engine.py
index 152ce78de..ef6bea1d5 100644
--- a/test/test_mock_tts_engine.py
+++ b/test/test_mock_tts_engine.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
 
-from voicevox_engine.dev.tts_engine import MockTTSEngine
+from voicevox_engine.dev.tts_engine.mock import MockTTSEngine
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.kana_converter import create_kana
diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
index c94fd282c..84be0140e 100644
--- a/voicevox_engine/cancellable_engine.py
+++ b/voicevox_engine/cancellable_engine.py
@@ -20,7 +20,7 @@
 from .metas.Metas import StyleId
 from .model import AudioQuery
 from .tts_pipeline.tts_engine import make_tts_engines_from_cores
-from .utility import get_latest_core_version
+from .utility.core_version_utility import get_latest_core_version
 
 
 class CancellableEngine:
diff --git a/voicevox_engine/core/__init__.py b/voicevox_engine/core/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/voicevox_engine/core/core_initializer.py b/voicevox_engine/core/core_initializer.py
index 3a5b1bf9a..66c7ca26c 100644
--- a/voicevox_engine/core/core_initializer.py
+++ b/voicevox_engine/core/core_initializer.py
@@ -4,7 +4,7 @@
 from typing import List, Optional
 
 from ..tts_pipeline.tts_engine import CoreAdapter
-from ..utility import engine_root, get_save_dir
+from ..utility.path_utility import engine_root, get_save_dir
 from .core_wrapper import CoreWrapper, load_runtime_lib
 
 MOCK_VER = "0.0.0"
@@ -129,7 +129,7 @@ def load_core_library(core_dir: Path, suppress_error: bool = False) -> None:
 
     else:
         # モック追加
-        from ..dev.core import MockCoreWrapper
+        from ..dev.core.mock import MockCoreWrapper
 
         if MOCK_VER not in cores:
             print("Info: Loading mock.")
diff --git a/voicevox_engine/dev/__init__.py b/voicevox_engine/dev/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/voicevox_engine/dev/core/__init__.py b/voicevox_engine/dev/core/__init__.py
index f04d393d3..e69de29bb 100644
--- a/voicevox_engine/dev/core/__init__.py
+++ b/voicevox_engine/dev/core/__init__.py
@@ -1,3 +0,0 @@
-from .mock import MockCoreWrapper
-
-__all__ = ["MockCoreWrapper"]
diff --git a/voicevox_engine/dev/tts_engine/__init__.py b/voicevox_engine/dev/tts_engine/__init__.py
index ae0b29ec2..e69de29bb 100644
--- a/voicevox_engine/dev/tts_engine/__init__.py
+++ b/voicevox_engine/dev/tts_engine/__init__.py
@@ -1,3 +0,0 @@
-from .mock import MockTTSEngine
-
-__all__ = ["MockTTSEngine"]
diff --git a/voicevox_engine/engine_manifest/__init__.py b/voicevox_engine/engine_manifest/__init__.py
index 02c293179..e69de29bb 100644
--- a/voicevox_engine/engine_manifest/__init__.py
+++ b/voicevox_engine/engine_manifest/__init__.py
@@ -1,7 +0,0 @@
-from .EngineManifest import EngineManifest
-from .EngineManifestLoader import EngineManifestLoader
-
-__all__ = [
-    "EngineManifest",
-    "EngineManifestLoader",
-]
diff --git a/voicevox_engine/metas/__init__.py b/voicevox_engine/metas/__init__.py
index 4907fdf38..e69de29bb 100644
--- a/voicevox_engine/metas/__init__.py
+++ b/voicevox_engine/metas/__init__.py
@@ -1,6 +0,0 @@
-from . import Metas, MetasStore
-
-__all__ = [
-    "Metas",
-    "MetasStore",
-]
diff --git a/voicevox_engine/preset/__init__.py b/voicevox_engine/preset/__init__.py
index 8c485e2fb..e69de29bb 100644
--- a/voicevox_engine/preset/__init__.py
+++ b/voicevox_engine/preset/__init__.py
@@ -1,9 +0,0 @@
-from .Preset import Preset
-from .PresetError import PresetError
-from .PresetManager import PresetManager
-
-__all__ = [
-    "Preset",
-    "PresetManager",
-    "PresetError",
-]
diff --git a/voicevox_engine/setting/SettingLoader.py b/voicevox_engine/setting/SettingLoader.py
index 2a22e025b..0e7e5ca90 100644
--- a/voicevox_engine/setting/SettingLoader.py
+++ b/voicevox_engine/setting/SettingLoader.py
@@ -2,7 +2,7 @@
 
 import yaml
 
-from ..utility import get_save_dir
+from ..utility.path_utility import get_save_dir
 from .Setting import Setting
 
 USER_SETTING_PATH: Path = get_save_dir() / "setting.yml"
diff --git a/voicevox_engine/setting/__init__.py b/voicevox_engine/setting/__init__.py
index ff399f92b..e69de29bb 100644
--- a/voicevox_engine/setting/__init__.py
+++ b/voicevox_engine/setting/__init__.py
@@ -1,9 +0,0 @@
-from .Setting import CorsPolicyMode, Setting
-from .SettingLoader import USER_SETTING_PATH, SettingLoader
-
-__all__ = [
-    "USER_SETTING_PATH",
-    "CorsPolicyMode",
-    "Setting",
-    "SettingLoader",
-]
diff --git a/voicevox_engine/tts_pipeline/__init__.py b/voicevox_engine/tts_pipeline/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index d239d5c3e..7abb61e31 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -572,7 +572,7 @@ def make_tts_engines_from_cores(cores: dict[str, CoreAdapter]) -> dict[str, TTSE
     tts_engines: dict[str, TTSEngine] = {}
     for ver, core in cores.items():
         if ver == MOCK_VER:
-            from ..dev.tts_engine import MockTTSEngine
+            from ..dev.tts_engine.mock import MockTTSEngine
 
             tts_engines[ver] = MockTTSEngine()
         else:
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
index 8661321f7..ed05c7615 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict.py
@@ -12,7 +12,8 @@
 
 from .model import UserDictWord, WordTypes
 from .part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY, part_of_speech_data
-from .utility import engine_root, get_save_dir, mutex_wrapper
+from .utility.mutex_utility import mutex_wrapper
+from .utility.path_utility import engine_root, get_save_dir
 
 root_dir = engine_root()
 save_dir = get_save_dir()
diff --git a/voicevox_engine/utility/__init__.py b/voicevox_engine/utility/__init__.py
index d40fea3e6..e69de29bb 100644
--- a/voicevox_engine/utility/__init__.py
+++ b/voicevox_engine/utility/__init__.py
@@ -1,20 +0,0 @@
-from .connect_base64_waves import (
-    ConnectBase64WavesException,
-    connect_base64_waves,
-    decode_base64_waves,
-)
-from .core_version_utility import get_latest_core_version, parse_core_version
-from .mutex_utility import mutex_wrapper
-from .path_utility import delete_file, engine_root, get_save_dir
-
-__all__ = [
-    "ConnectBase64WavesException",
-    "connect_base64_waves",
-    "decode_base64_waves",
-    "get_latest_core_version",
-    "parse_core_version",
-    "delete_file",
-    "engine_root",
-    "get_save_dir",
-    "mutex_wrapper",
-]

From 72da867f6d7fb3f52bbe1999c652fb7f597975b8 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 4 Feb 2024 09:40:28 +0900
Subject: [PATCH 162/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`TTSEngine`=20?=
 =?UTF-8?q?=E3=82=B9=E3=83=8A=E3=83=83=E3=83=97=E3=82=B7=E3=83=A7=E3=83=83?=
 =?UTF-8?q?=E3=83=88=20(#1013)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: TTSEngine スナップショット

* fix: ndarrayダンプ

* fix: スナップショット数値精度
---
 ...reate_accent_phrases_from_kana_output.json |   95 +
 ...t_mocked_create_accent_phrases_output.json |   95 +
 .../test_mocked_synthesize_wave_output.json   | 7172 +++++++++++++++++
 ...mocked_update_length_and_pitch_output.json |   95 +
 .../test_mocked_update_pitch_output.json      |   95 +
 test/tts_pipeline/test_tts_engine.py          |   87 +-
 6 files changed, 7638 insertions(+), 1 deletion(-)
 create mode 100644 test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_from_kana_output.json
 create mode 100644 test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_output.json
 create mode 100644 test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_output.json
 create mode 100644 test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_length_and_pitch_output.json
 create mode 100644 test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_pitch_output.json

diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_from_kana_output.json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_from_kana_output.json
new file mode 100644
index 000000000..671e117de
--- /dev/null
+++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_from_kana_output.json
@@ -0,0 +1,95 @@
+[
+  {
+    "accent": 5,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "k",
+        "consonant_length": 2.44,
+        "pitch": 4.38,
+        "text": "コ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": null,
+        "consonant_length": null,
+        "pitch": 1.25,
+        "text": "ン",
+        "vowel": "N",
+        "vowel_length": 1.25
+      },
+      {
+        "consonant": "n",
+        "consonant_length": 2.75,
+        "pitch": 4.06,
+        "text": "ニ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "ch",
+        "consonant_length": 1.62,
+        "pitch": 2.94,
+        "text": "チ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "w",
+        "consonant_length": 3.62,
+        "pitch": 4.19,
+        "text": "ワ",
+        "vowel": "a",
+        "vowel_length": 1.44
+      }
+    ],
+    "pause_mora": {
+      "consonant": null,
+      "consonant_length": null,
+      "pitch": 0.0,
+      "text": "、",
+      "vowel": "pau",
+      "vowel_length": 1.0
+    }
+  },
+  {
+    "accent": 1,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 3.69,
+        "text": "ヒ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 4.06,
+        "text": "ホ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": "d",
+        "consonant_length": 1.75,
+        "pitch": 2.62,
+        "text": "デ",
+        "vowel": "e",
+        "vowel_length": 1.88
+      },
+      {
+        "consonant": "s",
+        "consonant_length": 3.19,
+        "pitch": 0.0,
+        "text": "ス",
+        "vowel": "U",
+        "vowel_length": 1.38
+      }
+    ],
+    "pause_mora": null
+  }
+]
diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_output.json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_output.json
new file mode 100644
index 000000000..671e117de
--- /dev/null
+++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_create_accent_phrases_output.json
@@ -0,0 +1,95 @@
+[
+  {
+    "accent": 5,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "k",
+        "consonant_length": 2.44,
+        "pitch": 4.38,
+        "text": "コ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": null,
+        "consonant_length": null,
+        "pitch": 1.25,
+        "text": "ン",
+        "vowel": "N",
+        "vowel_length": 1.25
+      },
+      {
+        "consonant": "n",
+        "consonant_length": 2.75,
+        "pitch": 4.06,
+        "text": "ニ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "ch",
+        "consonant_length": 1.62,
+        "pitch": 2.94,
+        "text": "チ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "w",
+        "consonant_length": 3.62,
+        "pitch": 4.19,
+        "text": "ワ",
+        "vowel": "a",
+        "vowel_length": 1.44
+      }
+    ],
+    "pause_mora": {
+      "consonant": null,
+      "consonant_length": null,
+      "pitch": 0.0,
+      "text": "、",
+      "vowel": "pau",
+      "vowel_length": 1.0
+    }
+  },
+  {
+    "accent": 1,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 3.69,
+        "text": "ヒ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 4.06,
+        "text": "ホ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": "d",
+        "consonant_length": 1.75,
+        "pitch": 2.62,
+        "text": "デ",
+        "vowel": "e",
+        "vowel_length": 1.88
+      },
+      {
+        "consonant": "s",
+        "consonant_length": 3.19,
+        "pitch": 0.0,
+        "text": "ス",
+        "vowel": "U",
+        "vowel_length": 1.38
+      }
+    ],
+    "pause_mora": null
+  }
+]
diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_output.json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_output.json
new file mode 100644
index 000000000..22caa02ba
--- /dev/null
+++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_output.json
@@ -0,0 +1,7172 @@
+[
+  [
+    [
+      0.96,
+      0.96
+    ],
+    [
+      1.4,
+      1.4
+    ],
+    [
+      1.24,
+      1.24
+    ],
+    [
+      1.34,
+      1.34
+    ],
+    [
+      1.27,
+      1.27
+    ],
+    [
+      1.33,
+      1.33
+    ],
+    [
+      1.28,
+      1.28
+    ],
+    [
+      1.32,
+      1.32
+    ],
+    [
+      1.28,
+      1.28
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.31,
+      1.31
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.3,
+      1.3
+    ],
+    [
+      1.29,
+      1.29
+    ],
+    [
+      1.32,
+      1.32
+    ],
+    [
+      1.27,
+      1.27
+    ],
+    [
+      1.37,
+      1.37
+    ]
+  ]
+]
diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_length_and_pitch_output.json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_length_and_pitch_output.json
new file mode 100644
index 000000000..671e117de
--- /dev/null
+++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_length_and_pitch_output.json
@@ -0,0 +1,95 @@
+[
+  {
+    "accent": 5,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "k",
+        "consonant_length": 2.44,
+        "pitch": 4.38,
+        "text": "コ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": null,
+        "consonant_length": null,
+        "pitch": 1.25,
+        "text": "ン",
+        "vowel": "N",
+        "vowel_length": 1.25
+      },
+      {
+        "consonant": "n",
+        "consonant_length": 2.75,
+        "pitch": 4.06,
+        "text": "ニ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "ch",
+        "consonant_length": 1.62,
+        "pitch": 2.94,
+        "text": "チ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "w",
+        "consonant_length": 3.62,
+        "pitch": 4.19,
+        "text": "ワ",
+        "vowel": "a",
+        "vowel_length": 1.44
+      }
+    ],
+    "pause_mora": {
+      "consonant": null,
+      "consonant_length": null,
+      "pitch": 0.0,
+      "text": "、",
+      "vowel": "pau",
+      "vowel_length": 1.0
+    }
+  },
+  {
+    "accent": 1,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 3.69,
+        "text": "ヒ",
+        "vowel": "i",
+        "vowel_length": 2.31
+      },
+      {
+        "consonant": "h",
+        "consonant_length": 2.19,
+        "pitch": 4.06,
+        "text": "ホ",
+        "vowel": "o",
+        "vowel_length": 2.88
+      },
+      {
+        "consonant": "d",
+        "consonant_length": 1.75,
+        "pitch": 2.62,
+        "text": "デ",
+        "vowel": "e",
+        "vowel_length": 1.88
+      },
+      {
+        "consonant": "s",
+        "consonant_length": 3.19,
+        "pitch": 0.0,
+        "text": "ス",
+        "vowel": "U",
+        "vowel_length": 1.38
+      }
+    ],
+    "pause_mora": null
+  }
+]
diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_pitch_output.json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_pitch_output.json
new file mode 100644
index 000000000..c1b0b844e
--- /dev/null
+++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_update_pitch_output.json
@@ -0,0 +1,95 @@
+[
+  {
+    "accent": 5,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "k",
+        "consonant_length": 0.0,
+        "pitch": 4.38,
+        "text": "コ",
+        "vowel": "o",
+        "vowel_length": 0.0
+      },
+      {
+        "consonant": null,
+        "consonant_length": null,
+        "pitch": 1.25,
+        "text": "ン",
+        "vowel": "N",
+        "vowel_length": 0.0
+      },
+      {
+        "consonant": "n",
+        "consonant_length": 0.0,
+        "pitch": 4.06,
+        "text": "ニ",
+        "vowel": "i",
+        "vowel_length": 0.0
+      },
+      {
+        "consonant": "ch",
+        "consonant_length": 0.0,
+        "pitch": 2.94,
+        "text": "チ",
+        "vowel": "i",
+        "vowel_length": 0.0
+      },
+      {
+        "consonant": "w",
+        "consonant_length": 0.0,
+        "pitch": 4.19,
+        "text": "ワ",
+        "vowel": "a",
+        "vowel_length": 0.0
+      }
+    ],
+    "pause_mora": {
+      "consonant": null,
+      "consonant_length": null,
+      "pitch": 0.0,
+      "text": "、",
+      "vowel": "pau",
+      "vowel_length": 0.0
+    }
+  },
+  {
+    "accent": 1,
+    "is_interrogative": false,
+    "moras": [
+      {
+        "consonant": "h",
+        "consonant_length": 0.0,
+        "pitch": 3.69,
+        "text": "ヒ",
+        "vowel": "i",
+        "vowel_length": 0.0
+      },
+      {
+        "consonant": "h",
+        "consonant_length": 0.0,
+        "pitch": 4.06,
+        "text": "ホ",
+        "vowel": "o",
+        "vowel_length": 0.0
+      },
+      {
+        "consonant": "d",
+        "consonant_length": 0.0,
+        "pitch": 2.62,
+        "text": "デ",
+        "vowel": "e",
+        "vowel_length": 0.0
+      },
+      {
+        "consonant": "s",
+        "consonant_length": 0.0,
+        "pitch": 0.0,
+        "text": "ス",
+        "vowel": "U",
+        "vowel_length": 0.0
+      }
+    ],
+    "pause_mora": null
+  }
+]
diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index b29fa0f65..1037a5bc5 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -9,7 +9,7 @@
 
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
-from voicevox_engine.model import AccentPhrase, Mora
+from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
 from voicevox_engine.tts_pipeline.acoustic_feature_extractor import (
     UNVOICED_MORA_TAIL_PHONEMES,
     Phoneme,
@@ -137,6 +137,14 @@ def test_to_flatten_phonemes():
     assert true_phonemes == phonemes
 
 
+def _gen_hello_hiho_text() -> str:
+    return "こんにちは、ヒホです"
+
+
+def _gen_hello_hiho_kana() -> str:
+    return "コンニチワ'、ヒ'ホデ_ス"
+
+
 def _gen_hello_hiho_accent_phrases() -> list[AccentPhrase]:
     return [
         AccentPhrase(
@@ -163,6 +171,21 @@ def _gen_hello_hiho_accent_phrases() -> list[AccentPhrase]:
     ]
 
 
+def _gen_hello_hiho_query() -> AudioQuery:
+    return AudioQuery(
+        accent_phrases=_gen_hello_hiho_accent_phrases(),
+        speedScale=2.0,
+        pitchScale=1.1,
+        intonationScale=0.9,
+        volumeScale=1.3,
+        prePhonemeLength=0.1,
+        postPhonemeLength=0.2,
+        outputSamplingRate=12000,
+        outputStereo=True,
+        kana=_gen_hello_hiho_kana(),
+    )
+
+
 class TestTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
@@ -323,6 +346,7 @@ def test_create_accent_phrases_toward_unknown():
 
 
 def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> None:
+    """モックされた `TTSEngine.update_length()` の出力スナップショットが一定である"""
     # Inputs
     tts_engine = TTSEngine(MockCoreWrapper())
     hello_hiho = _gen_hello_hiho_accent_phrases()
@@ -332,6 +356,67 @@ def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> No
     assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
 
 
+def test_mocked_update_pitch_output(snapshot_json: JSONSnapshotExtension) -> None:
+    """モックされた `TTSEngine.update_pitch()` の出力スナップショットが一定である"""
+    # Inputs
+    tts_engine = TTSEngine(MockCoreWrapper())
+    hello_hiho = _gen_hello_hiho_accent_phrases()
+    # Outputs
+    result = tts_engine.update_pitch(hello_hiho, StyleId(1))
+    # Tests
+    assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
+
+
+def test_mocked_update_length_and_pitch_output(
+    snapshot_json: JSONSnapshotExtension,
+) -> None:
+    """モックされた `TTSEngine.update_length_and_pitch()` の出力スナップショットが一定である"""
+    # Inputs
+    tts_engine = TTSEngine(MockCoreWrapper())
+    hello_hiho = _gen_hello_hiho_accent_phrases()
+    # Outputs
+    result = tts_engine.update_length_and_pitch(hello_hiho, StyleId(1))
+    # Tests
+    assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
+
+
+def test_mocked_create_accent_phrases_output(
+    snapshot_json: JSONSnapshotExtension,
+) -> None:
+    """モックされた `TTSEngine.create_accent_phrases()` の出力スナップショットが一定である"""
+    # Inputs
+    tts_engine = TTSEngine(MockCoreWrapper())
+    hello_hiho = _gen_hello_hiho_text()
+    # Outputs
+    result = tts_engine.create_accent_phrases(hello_hiho, StyleId(1))
+    # Tests
+    assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
+
+
+def test_mocked_create_accent_phrases_from_kana_output(
+    snapshot_json: JSONSnapshotExtension,
+) -> None:
+    """モックされた `TTSEngine.create_accent_phrases_from_kana()` の出力スナップショットが一定である"""
+    # Inputs
+    tts_engine = TTSEngine(MockCoreWrapper())
+    hello_hiho = _gen_hello_hiho_kana()
+    # Outputs
+    result = tts_engine.create_accent_phrases_from_kana(hello_hiho, StyleId(1))
+    # Tests
+    assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
+
+
+def test_mocked_synthesize_wave_output(snapshot_json: JSONSnapshotExtension) -> None:
+    """モックされた `TTSEngine.synthesize_wave()` の出力スナップショットが一定である"""
+    # Inputs
+    tts_engine = TTSEngine(MockCoreWrapper())
+    hello_hiho = _gen_hello_hiho_query()
+    # Outputs
+    result = tts_engine.synthesize_wave(hello_hiho, StyleId(1))
+    # Tests
+    assert snapshot_json == round_floats(result.tolist(), round_value=2)
+
+
 def koreha_arimasuka_base_expected():
     return [
         AccentPhrase(

From d5b1bee2ddd26fbb67352af9ce023327f2b2450e Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 4 Feb 2024 09:43:43 +0900
Subject: [PATCH 163/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`frame=5Fquery=5F?=
 =?UTF-8?q?to=5Fdecoder=5Ffeature()`=20=E5=88=87=E3=82=8A=E5=87=BA?=
 =?UTF-8?q?=E3=81=97=20(#1041)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `frame_query_to_decoder_feature()` 切り出し

* fix: 関数名
---
 voicevox_engine/tts_pipeline/tts_engine.py | 59 ++++++++++++----------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 7abb61e31..c0c553516 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -282,6 +282,35 @@ def calc_phoneme_lengths(
     return phoneme_durations_array
 
 
+def frame_query_to_sf_decoder_feature(
+    query: FrameAudioQuery,
+) -> tuple[NDArray[np.int64], NDArray[np.float32], NDArray[np.float32]]:
+    """歌声合成用のクエリからフレームごとの音素・音高・音量を得る"""
+
+    # 各データを分解・numpy配列に変換する
+    phonemes = []
+    phoneme_lengths = []
+
+    for phoneme in query.phonemes:
+        if phoneme.phoneme not in Phoneme._PHONEME_LIST:
+            raise HTTPException(
+                status_code=400,
+                detail=f"phoneme {phoneme.phoneme} is not valid",
+            )
+
+        phonemes.append(Phoneme(phoneme.phoneme).id)
+        phoneme_lengths.append(phoneme.frame_length)
+
+    phonemes_array = np.array(phonemes, dtype=np.int64)
+    phoneme_lengths_array = np.array(phoneme_lengths, dtype=np.int64)
+
+    frame_phonemes = np.repeat(phonemes_array, phoneme_lengths_array)
+    f0s = np.array(query.f0, dtype=np.float32)
+    volumes = np.array(query.volume, dtype=np.float32)
+
+    return frame_phonemes, f0s, volumes
+
+
 class TTSEngine:
     """音声合成器（core）の管理/実行/プロキシと音声合成フロー"""
 
@@ -530,38 +559,16 @@ def create_sing_phoneme_and_f0_and_volume(
 
     def frame_synthsize_wave(
         self,
-        frame_audio_query: FrameAudioQuery,
+        query: FrameAudioQuery,
         style_id: StyleId,
     ) -> NDArray[np.float32]:
         """歌声合成用のクエリ・スタイルIDに基づいて音声波形を生成する"""
 
-        # 各データを分解・numpy配列に変換する
-        phonemes = []
-        phoneme_lengths = []
-
-        for phoneme in frame_audio_query.phonemes:
-            if phoneme.phoneme not in Phoneme._PHONEME_LIST:
-                raise HTTPException(
-                    status_code=400,
-                    detail=f"phoneme {phoneme.phoneme} is not valid",
-                )
-
-            phonemes.append(Phoneme(phoneme.phoneme).id)
-            phoneme_lengths.append(phoneme.frame_length)
-
-        phonemes_array = np.array(phonemes, dtype=np.int64)
-        phoneme_lengths_array = np.array(phoneme_lengths, dtype=np.int64)
-
-        frame_phonemes = np.repeat(phonemes_array, phoneme_lengths_array)
-        f0s = np.array(frame_audio_query.f0, dtype=np.float32)
-        volumes = np.array(frame_audio_query.volume, dtype=np.float32)
-
-        # コアを用いて音声を生成する
+        phoneme, f0, volume = frame_query_to_sf_decoder_feature(query)
         raw_wave, sr_raw_wave = self._core.safe_sf_decode_forward(
-            frame_phonemes, f0s, volumes, style_id
+            phoneme, f0, volume, style_id
         )
-
-        wave = raw_wave_to_output_wave(frame_audio_query, raw_wave, sr_raw_wave)
+        wave = raw_wave_to_output_wave(query, raw_wave, sr_raw_wave)
         return wave
 
 

From 843bd7626a9999485ea6972fe0a66f406b02a868 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sun, 4 Feb 2024 09:45:27 +0900
Subject: [PATCH 164/177] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20mypy=201.8=20(#10?=
 =?UTF-8?q?42)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: mypy1.8

* fix: requirements-test.txt
---
 poetry.lock           | 58 ++++++++++++++++++++++---------------------
 pyproject.toml        |  2 +-
 requirements-test.txt |  2 +-
 3 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f32b58cd3..daa98c671 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1186,38 +1186,38 @@ files = [
 
 [[package]]
 name = "mypy"
-version = "1.6.0"
+version = "1.8.0"
 description = "Optional static typing for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "mypy-1.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:091f53ff88cb093dcc33c29eee522c087a438df65eb92acd371161c1f4380ff0"},
-    {file = "mypy-1.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb7ff4007865833c470a601498ba30462b7374342580e2346bf7884557e40531"},
-    {file = "mypy-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49499cf1e464f533fc45be54d20a6351a312f96ae7892d8e9f1708140e27ce41"},
-    {file = "mypy-1.6.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c192445899c69f07874dabda7e931b0cc811ea055bf82c1ababf358b9b2a72c"},
-    {file = "mypy-1.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:3df87094028e52766b0a59a3e46481bb98b27986ed6ded6a6cc35ecc75bb9182"},
-    {file = "mypy-1.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c8835a07b8442da900db47ccfda76c92c69c3a575872a5b764332c4bacb5a0a"},
-    {file = "mypy-1.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:24f3de8b9e7021cd794ad9dfbf2e9fe3f069ff5e28cb57af6f873ffec1cb0425"},
-    {file = "mypy-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:856bad61ebc7d21dbc019b719e98303dc6256cec6dcc9ebb0b214b81d6901bd8"},
-    {file = "mypy-1.6.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:89513ddfda06b5c8ebd64f026d20a61ef264e89125dc82633f3c34eeb50e7d60"},
-    {file = "mypy-1.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:9f8464ed410ada641c29f5de3e6716cbdd4f460b31cf755b2af52f2d5ea79ead"},
-    {file = "mypy-1.6.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:971104bcb180e4fed0d7bd85504c9036346ab44b7416c75dd93b5c8c6bb7e28f"},
-    {file = "mypy-1.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ab98b8f6fdf669711f3abe83a745f67f50e3cbaea3998b90e8608d2b459fd566"},
-    {file = "mypy-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a69db3018b87b3e6e9dd28970f983ea6c933800c9edf8c503c3135b3274d5ad"},
-    {file = "mypy-1.6.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:dccd850a2e3863891871c9e16c54c742dba5470f5120ffed8152956e9e0a5e13"},
-    {file = "mypy-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:f8598307150b5722854f035d2e70a1ad9cc3c72d392c34fffd8c66d888c90f17"},
-    {file = "mypy-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fea451a3125bf0bfe716e5d7ad4b92033c471e4b5b3e154c67525539d14dc15a"},
-    {file = "mypy-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e28d7b221898c401494f3b77db3bac78a03ad0a0fff29a950317d87885c655d2"},
-    {file = "mypy-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4b7a99275a61aa22256bab5839c35fe8a6887781862471df82afb4b445daae6"},
-    {file = "mypy-1.6.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7469545380dddce5719e3656b80bdfbb217cfe8dbb1438532d6abc754b828fed"},
-    {file = "mypy-1.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:7807a2a61e636af9ca247ba8494031fb060a0a744b9fee7de3a54bed8a753323"},
-    {file = "mypy-1.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d2dad072e01764823d4b2f06bc7365bb1d4b6c2f38c4d42fade3c8d45b0b4b67"},
-    {file = "mypy-1.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b19006055dde8a5425baa5f3b57a19fa79df621606540493e5e893500148c72f"},
-    {file = "mypy-1.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31eba8a7a71f0071f55227a8057468b8d2eb5bf578c8502c7f01abaec8141b2f"},
-    {file = "mypy-1.6.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e0db37ac4ebb2fee7702767dfc1b773c7365731c22787cb99f507285014fcaf"},
-    {file = "mypy-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:c69051274762cccd13498b568ed2430f8d22baa4b179911ad0c1577d336ed849"},
-    {file = "mypy-1.6.0-py3-none-any.whl", hash = "sha256:9e1589ca150a51d9d00bb839bfeca2f7a04f32cd62fad87a847bc0818e15d7dc"},
-    {file = "mypy-1.6.0.tar.gz", hash = "sha256:4f3d27537abde1be6d5f2c96c29a454da333a2a271ae7d5bc7110e6d4b7beb3f"},
+    {file = "mypy-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485a8942f671120f76afffff70f259e1cd0f0cfe08f81c05d8816d958d4577d3"},
+    {file = "mypy-1.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:df9824ac11deaf007443e7ed2a4a26bebff98d2bc43c6da21b2b64185da011c4"},
+    {file = "mypy-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2afecd6354bbfb6e0160f4e4ad9ba6e4e003b767dd80d85516e71f2e955ab50d"},
+    {file = "mypy-1.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8963b83d53ee733a6e4196954502b33567ad07dfd74851f32be18eb932fb1cb9"},
+    {file = "mypy-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e46f44b54ebddbeedbd3d5b289a893219065ef805d95094d16a0af6630f5d410"},
+    {file = "mypy-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:855fe27b80375e5c5878492f0729540db47b186509c98dae341254c8f45f42ae"},
+    {file = "mypy-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c886c6cce2d070bd7df4ec4a05a13ee20c0aa60cb587e8d1265b6c03cf91da3"},
+    {file = "mypy-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d19c413b3c07cbecf1f991e2221746b0d2a9410b59cb3f4fb9557f0365a1a817"},
+    {file = "mypy-1.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9261ed810972061388918c83c3f5cd46079d875026ba97380f3e3978a72f503d"},
+    {file = "mypy-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:51720c776d148bad2372ca21ca29256ed483aa9a4cdefefcef49006dff2a6835"},
+    {file = "mypy-1.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:52825b01f5c4c1c4eb0db253ec09c7aa17e1a7304d247c48b6f3599ef40db8bd"},
+    {file = "mypy-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f5ac9a4eeb1ec0f1ccdc6f326bcdb464de5f80eb07fb38b5ddd7b0de6bc61e55"},
+    {file = "mypy-1.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afe3fe972c645b4632c563d3f3eff1cdca2fa058f730df2b93a35e3b0c538218"},
+    {file = "mypy-1.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:42c6680d256ab35637ef88891c6bd02514ccb7e1122133ac96055ff458f93fc3"},
+    {file = "mypy-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:720a5ca70e136b675af3af63db533c1c8c9181314d207568bbe79051f122669e"},
+    {file = "mypy-1.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:028cf9f2cae89e202d7b6593cd98db6759379f17a319b5faf4f9978d7084cdc6"},
+    {file = "mypy-1.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4e6d97288757e1ddba10dd9549ac27982e3e74a49d8d0179fc14d4365c7add66"},
+    {file = "mypy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f1478736fcebb90f97e40aff11a5f253af890c845ee0c850fe80aa060a267c6"},
+    {file = "mypy-1.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42419861b43e6962a649068a61f4a4839205a3ef525b858377a960b9e2de6e0d"},
+    {file = "mypy-1.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:2b5b6c721bd4aabaadead3a5e6fa85c11c6c795e0c81a7215776ef8afc66de02"},
+    {file = "mypy-1.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5c1538c38584029352878a0466f03a8ee7547d7bd9f641f57a0f3017a7c905b8"},
+    {file = "mypy-1.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ef4be7baf08a203170f29e89d79064463b7fc7a0908b9d0d5114e8009c3a259"},
+    {file = "mypy-1.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7178def594014aa6c35a8ff411cf37d682f428b3b5617ca79029d8ae72f5402b"},
+    {file = "mypy-1.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ab3c84fa13c04aeeeabb2a7f67a25ef5d77ac9d6486ff33ded762ef353aa5592"},
+    {file = "mypy-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:99b00bc72855812a60d253420d8a2eae839b0afa4938f09f4d2aa9bb4654263a"},
+    {file = "mypy-1.8.0-py3-none-any.whl", hash = "sha256:538fd81bb5e430cc1381a443971c0475582ff9f434c16cd46d2c66763ce85d9d"},
+    {file = "mypy-1.8.0.tar.gz", hash = "sha256:6ff8b244d7085a0b425b56d327b480c3b29cafbd2eff27316a004f9a7391ae07"},
 ]
 
 [package.dependencies]
@@ -1227,6 +1227,7 @@ typing-extensions = ">=4.1.0"
 [package.extras]
 dmypy = ["psutil (>=4.0)"]
 install-types = ["pip"]
+mypyc = ["setuptools (>=50)"]
 reports = ["lxml"]
 
 [[package]]
@@ -1832,6 +1833,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
diff --git a/pyproject.toml b/pyproject.toml
index 22f0b66df..9d0db2df3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ black = "^22.12.0"
 flake8-bugbear = "^23.1.0"
 flake8 = "^6.0.0"
 isort = "^5.12.0"
-mypy = "^1.6.0"
+mypy = "^1.8.0"
 pytest = "^7.4.3"
 coveralls = "^3.2.0"
 poetry = "^1.3.1"
diff --git a/requirements-test.txt b/requirements-test.txt
index 46d3f71c5..ea3290c26 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -44,7 +44,7 @@ mccabe==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
 more-itertools==10.1.0 ; python_version >= "3.11" and python_version < "3.12"
 msgpack==1.0.5 ; python_version >= "3.11" and python_version < "3.12"
 mypy-extensions==1.0.0 ; python_version >= "3.11" and python_version < "3.12"
-mypy==1.6.0 ; python_version >= "3.11" and python_version < "3.12"
+mypy==1.8.0 ; python_version >= "3.11" and python_version < "3.12"
 numpy==1.26.2 ; python_version >= "3.11" and python_version < "3.12"
 packaging==23.1 ; python_version >= "3.11" and python_version < "3.12"
 pathspec==0.11.2 ; python_version >= "3.11" and python_version < "3.12"

From 1f35237fe30a9c79716bdfc92b49d624d1abb395 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Mon, 5 Feb 2024 17:17:55 +0900
Subject: [PATCH 165/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20lint=20(#1046)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: lint

* fix: lint
---
 .github/workflows/build-docker.yml               |  2 +-
 .github/workflows/build.yml                      |  6 +++---
 .github/workflows/release-test-docker.yml        | 16 ++++++++--------
 .github/workflows/release-test.yml               |  6 +++---
 .../create_venv_and_generate_licenses.bash       |  2 +-
 build_util/process_voicevox_resource.bash        | 10 +++++-----
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml
index f146f3bff..8ce46a1c0 100644
--- a/.github/workflows/build-docker.yml
+++ b/.github/workflows/build-docker.yml
@@ -32,7 +32,7 @@ jobs:
         id: vars
         run: |
           : # releaseタグ名か、workflow_dispatchでのバージョン名か、latestが入る
-          echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> $GITHUB_OUTPUT
+          echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> "$GITHUB_OUTPUT"
 
   build-docker:
     needs: [config]
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 2084ff6b6..470a14ac2 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -44,9 +44,9 @@ jobs:
         id: vars
         run: |
           : # release タグ名, または workflow_dispatch でのバージョン名. リリースでない (push event) 場合は空文字列
-          echo "version=${{ github.event.release.tag_name || github.event.inputs.version }}" >> $GITHUB_OUTPUT
+          echo "version=${{ github.event.release.tag_name || github.event.inputs.version }}" >> "$GITHUB_OUTPUT"
           : # release タグ名, または workflow_dispatch でのバージョン名, または 'latest'
-          echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> $GITHUB_OUTPUT
+          echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> "$GITHUB_OUTPUT"
 
   build-and-upload:
     needs: [config]
@@ -108,7 +108,7 @@ jobs:
       - name: declare variables
         id: vars
         run: |
-          echo "package_name=voicevox_engine-${{ matrix.target }}-${{ needs.config.outputs.version }}" >> $GITHUB_OUTPUT
+          echo "package_name=voicevox_engine-${{ matrix.target }}-${{ needs.config.outputs.version }}" >> "$GITHUB_OUTPUT"
 
       - uses: actions/checkout@v4
 
diff --git a/.github/workflows/release-test-docker.yml b/.github/workflows/release-test-docker.yml
index da73a3a8f..d30337798 100644
--- a/.github/workflows/release-test-docker.yml
+++ b/.github/workflows/release-test-docker.yml
@@ -58,9 +58,9 @@ jobs:
         id: docker_vars
         run: |
           if [ "${{ matrix.tag }}" != "" ]; then
-            echo "image_tag=${{ env.IMAGE_NAME }}:${{ matrix.tag }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT
+            echo "image_tag=${{ env.IMAGE_NAME }}:${{ matrix.tag }}-${{ env.VERSION }}" >> "$GITHUB_OUTPUT"
           else
-            echo "image_tag=${{ env.IMAGE_NAME }}:${{ env.VERSION }}" >> $GITHUB_OUTPUT
+            echo "image_tag=${{ env.IMAGE_NAME }}:${{ env.VERSION }}" >> "$GITHUB_OUTPUT"
           fi
 
       - name: Docker pull
@@ -81,14 +81,14 @@ jobs:
           max_attempts=10
           sleep_interval=5
           
-          for i in $(seq 1 $max_attempts); do
-            status=$(curl -o /dev/null -s -w '%{http_code}\n' $url)
-            if [ $status -eq 200 ]; then
-              echo "Container is ready! Response status code: $status"
+          for i in $(seq 1 "$max_attempts"); do
+            status=$(curl -o /dev/null -s -w '%{http_code}\n' "$url")
+            if [ "$status" -eq 200 ]; then
+              echo "Container is ready! Response status code: ${status}"
               exit 0
             else
-              echo "Attempt $i/$max_attempts: Response status code $status"
-              sleep $sleep_interval
+              echo "Attempt ${i}/${max_attempts}: Response status code $status"
+              sleep "${sleep_interval}"
             fi
           done
           exit 1
diff --git a/.github/workflows/release-test.yml b/.github/workflows/release-test.yml
index d5995ae18..ec680ab10 100644
--- a/.github/workflows/release-test.yml
+++ b/.github/workflows/release-test.yml
@@ -56,8 +56,8 @@ jobs:
       - name: declare variables
         id: vars
         run: |
-          echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> $GITHUB_OUTPUT
-          echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT
+          echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> "$GITHUB_OUTPUT"
+          echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> "$GITHUB_OUTPUT"
 
       - uses: actions/checkout@v4
 
@@ -72,7 +72,7 @@ jobs:
           curl -L -o "download/list.txt" "${{ steps.vars.outputs.release_url }}/${{ steps.vars.outputs.package_name }}.7z.txt"
           cat "download/list.txt" | xargs -I '%' curl -L -o "download/%" "${{ steps.vars.outputs.release_url }}/%"
           7z x "download/$(head -n1 download/list.txt)"
-          mv ${{ matrix.target }} dist/
+          mv "${{ matrix.target }}" dist/
 
       - name: chmod +x
         if: startsWith(matrix.target, 'linux') || startsWith(matrix.target, 'macos')
diff --git a/build_util/create_venv_and_generate_licenses.bash b/build_util/create_venv_and_generate_licenses.bash
index 71a5f61c9..fc9dd0dc5 100644
--- a/build_util/create_venv_and_generate_licenses.bash
+++ b/build_util/create_venv_and_generate_licenses.bash
@@ -17,7 +17,7 @@ else
 fi
 
 pip install -r requirements-license.txt
-python build_util/generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH
+python build_util/generate_licenses.py > "${OUTPUT_LICENSE_JSON_PATH}"
 
 deactivate
 
diff --git a/build_util/process_voicevox_resource.bash b/build_util/process_voicevox_resource.bash
index 7bd1d31f9..c085dfeee 100644
--- a/build_util/process_voicevox_resource.bash
+++ b/build_util/process_voicevox_resource.bash
@@ -6,22 +6,22 @@ if [ ! -v DOWNLOAD_RESOURCE_PATH ]; then
 fi
 
 rm -r speaker_info
-cp -r $DOWNLOAD_RESOURCE_PATH/character_info speaker_info
+cp -r "${DOWNLOAD_RESOURCE_PATH}/character_info" speaker_info
 
 # キャラクター情報の前処理をする
-python $DOWNLOAD_RESOURCE_PATH/scripts/clean_character_info.py \
+python "${DOWNLOAD_RESOURCE_PATH}/scripts/clean_character_info.py" \
     --character_info_dir speaker_info/
 
 # マニフェスト
-jq -s '.[0] * .[1]' engine_manifest.json $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest.json \
+jq -s '.[0] * .[1]' engine_manifest.json "${DOWNLOAD_RESOURCE_PATH}/engine/engine_manifest.json" \
     > engine_manifest.json.tmp
 mv engine_manifest.json.tmp engine_manifest.json
 
 python build_util/merge_update_infos.py \
     engine_manifest_assets/update_infos.json \
-    $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/update_infos.json \
+    "${DOWNLOAD_RESOURCE_PATH}/engine/engine_manifest_assets/update_infos.json" \
     engine_manifest_assets/update_infos.json
 
 for f in $(ls $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/* | grep -v update_infos.json); do
-    cp $f ./engine_manifest_assets/
+    cp "${f}" ./engine_manifest_assets/
 done

From f2d93c7144a6149f3cc541ee64130b1b68d38a7f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 6 Feb 2024 18:44:21 +0900
Subject: [PATCH 166/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E9=87=8D?=
 =?UTF-8?q?=E8=A4=87=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE=E5=89=8A=E9=99=A4?=
 =?UTF-8?q?=20(#1043)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: 重複テストの削除
---
 test/tts_pipeline/test_tts_engine.py | 68 ++--------------------------
 1 file changed, 5 insertions(+), 63 deletions(-)

diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index 1037a5bc5..e58f99538 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -10,10 +10,6 @@
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
-from voicevox_engine.tts_pipeline.acoustic_feature_extractor import (
-    UNVOICED_MORA_TAIL_PHONEMES,
-    Phoneme,
-)
 from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
@@ -208,8 +204,8 @@ def test_to_flatten_moras(self):
     def test_update_length(self):
         # Inputs
         hello_hiho = _gen_hello_hiho_accent_phrases()
-        # Outputs & Indirect Outputs（yukarin_sに渡される値）
-        result = self.tts_engine.update_length(hello_hiho, StyleId(1))
+        # Indirect Outputs（yukarin_sに渡される値）
+        self.tts_engine.update_length(hello_hiho, StyleId(1))
         yukarin_s_args = self.yukarin_s_mock.call_args[1]
         list_length = yukarin_s_args["length"]
         phoneme_list = yukarin_s_args["phoneme_list"]
@@ -220,24 +216,7 @@ def test_update_length(self):
         true_phoneme_list_1 = [0, 23, 30, 4, 28, 21, 10, 21, 42, 7]
         true_phoneme_list_2 = [0, 19, 21, 19, 30, 12, 14, 35, 6, 0]
         true_phoneme_list = true_phoneme_list_1 + true_phoneme_list_2
-        true_result = _gen_hello_hiho_accent_phrases()
-        index = 1
-
-        def result_value(i: int) -> float:
-            return np.float32(round(float(phoneme_list[i] * 0.0625 + 1), 2)).item()
-
-        for accent_phrase in true_result:
-            moras = accent_phrase.moras
-            for mora in moras:
-                if mora.consonant is not None:
-                    mora.consonant_length = result_value(index)
-                    index += 1
-                mora.vowel_length = result_value(index)
-                index += 1
-            if accent_phrase.pause_mora is not None:
-                accent_phrase.pause_mora.vowel_length = result_value(index)
-                index += 1
-        # Tests
+
         self.assertEqual(list_length, true_list_length)
         self.assertEqual(list_length, len(phoneme_list))
         self.assertEqual(style_id, true_style_id)
@@ -245,7 +224,6 @@ def result_value(i: int) -> float:
             phoneme_list,
             np.array(true_phoneme_list, dtype=np.int64),
         )
-        self.assertEqual(result, true_result)
 
     def test_update_pitch(self):
         # 空のリストでエラーを吐かないか
@@ -260,8 +238,8 @@ def test_update_pitch(self):
 
         # Inputs
         hello_hiho = _gen_hello_hiho_accent_phrases()
-        # Outputs & Indirect Outputs（yukarin_saに渡される値）
-        result = self.tts_engine.update_pitch(hello_hiho, StyleId(1))
+        # Indirect Outputs（yukarin_saに渡される値）
+        self.tts_engine.update_pitch(hello_hiho, StyleId(1))
         yukarin_sa_args = self.yukarin_sa_mock.call_args[1]
         list_length = yukarin_sa_args["length"]
         vowel_phoneme_list = yukarin_sa_args["vowel_phoneme_list"][0]
@@ -278,42 +256,7 @@ def test_update_pitch(self):
         true_accent_ends = np.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0])
         true_phrase_starts = np.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
         true_phrase_ends = np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])
-        true_result = _gen_hello_hiho_accent_phrases()
-        index = 1
 
-        def result_value(i: int) -> float:
-            # unvoiced_vowel_likesのPhoneme ID版
-            unvoiced_mora_tail_ids = [
-                Phoneme(p).id for p in UNVOICED_MORA_TAIL_PHONEMES
-            ]
-            if vowel_phoneme_list[i] in unvoiced_mora_tail_ids:
-                return 0
-            return np.float32(
-                round(
-                    (
-                        (
-                            vowel_phoneme_list[i]
-                            + consonant_phoneme_list[i]
-                            + start_accent_list[i]
-                            + end_accent_list[i]
-                            + start_accent_phrase_list[i]
-                            + end_accent_phrase_list[i]
-                        )
-                        * 0.0625
-                        + 1
-                    ),
-                    2,
-                )
-            ).item()
-
-        for accent_phrase in true_result:
-            moras = accent_phrase.moras
-            for mora in moras:
-                mora.pitch = result_value(index)
-                index += 1
-            if accent_phrase.pause_mora is not None:
-                accent_phrase.pause_mora.pitch = result_value(index)
-                index += 1
         # Tests
         self.assertEqual(list_length, 12)
         self.assertEqual(list_length, len(vowel_phoneme_list))
@@ -329,7 +272,6 @@ def result_value(i: int) -> float:
         np.testing.assert_array_equal(end_accent_list, true_accent_ends)
         np.testing.assert_array_equal(start_accent_phrase_list, true_phrase_starts)
         np.testing.assert_array_equal(end_accent_phrase_list, true_phrase_ends)
-        self.assertEqual(result, true_result)
 
 
 def test_create_accent_phrases_toward_unknown():

From 4b15bdc29a3f324f54a065a1c5fadedf0baa55ea Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 6 Feb 2024 18:47:07 +0900
Subject: [PATCH 167/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20json=E3=82=B9?=
 =?UTF-8?q?=E3=83=8A=E3=83=83=E3=83=97=E3=82=B7=E3=83=A7=E3=83=83=E3=83=88?=
 =?UTF-8?q?=E3=81=AEdiff=E3=82=92github=E4=B8=8A=E3=81=A7=E5=B1=95?=
 =?UTF-8?q?=E9=96=8B=E3=81=97=E3=81=AA=E3=81=84=E3=82=88=E3=81=86=E3=81=AB?=
 =?UTF-8?q?=20(#1047)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 型はSnapshotAssertionだった

* スナップショットjsonをlinguist-generated=trueに
---
 .gitattributes                                     |  4 +++-
 test/conftest.py                                   |  2 +-
 ...6\227\343\201\247\343\201\215\343\202\213.json" | 14 ++++++++++++++
 test/e2e/test_audio_query.py                       |  4 ++--
 test/e2e/test_openapi.py                           |  6 ++----
 test/e2e/test_preset.py                            | 12 ++++++++++++
 test/e2e/test_validate_speakers.py                 |  4 ++--
 test/tts_pipeline/test_tts_engine.py               | 14 +++++++-------
 8 files changed, 43 insertions(+), 17 deletions(-)
 create mode 100644 "test/e2e/__snapshots__/test_preset/test_\343\203\227\343\203\252\343\202\273\343\203\203\343\203\210\344\270\200\350\246\247\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
 create mode 100644 test/e2e/test_preset.py

diff --git a/.gitattributes b/.gitattributes
index 537137ab5..c0428316d 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,5 @@
+test/**/__snapshots__/**/*.json linguist-generated=true
+
 * text=auto
 *.png -text
-*.wav -text
\ No newline at end of file
+*.wav -text
diff --git a/test/conftest.py b/test/conftest.py
index dd7920d24..e354d3809 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -10,7 +10,7 @@ def snapshot_json(snapshot: SnapshotAssertion) -> SnapshotAssertion:
 
     Examples
     --------
-    >>> def test_foo(snapshot_json: JSONSnapshotExtension):
+    >>> def test_foo(snapshot_json: SnapshotAssertion):
     >>>     assert snapshot_json == {"key": "value"}
     """
     return snapshot.use_extension(JSONSnapshotExtension)
diff --git "a/test/e2e/__snapshots__/test_preset/test_\343\203\227\343\203\252\343\202\273\343\203\203\343\203\210\344\270\200\350\246\247\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_preset/test_\343\203\227\343\203\252\343\202\273\343\203\203\343\203\210\344\270\200\350\246\247\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
new file mode 100644
index 000000000..07e2707b8
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_preset/test_\343\203\227\343\203\252\343\202\273\343\203\203\343\203\210\344\270\200\350\246\247\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
@@ -0,0 +1,14 @@
+[
+  {
+    "id": 1,
+    "intonationScale": 1.0,
+    "name": "サンプルプリセット",
+    "pitchScale": 0.0,
+    "postPhonemeLength": 0.1,
+    "prePhonemeLength": 0.1,
+    "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
+    "speedScale": 1.0,
+    "style_id": 0,
+    "volumeScale": 1.0
+  }
+]
diff --git a/test/e2e/test_audio_query.py b/test/e2e/test_audio_query.py
index 6f8c6fa86..a77db614e 100644
--- a/test/e2e/test_audio_query.py
+++ b/test/e2e/test_audio_query.py
@@ -5,11 +5,11 @@
 from test.utility import round_floats
 
 from fastapi.testclient import TestClient
-from syrupy.extensions.json import JSONSnapshotExtension
+from syrupy.assertion import SnapshotAssertion
 
 
 def test_speakerを指定して音声合成クエリが取得できる(
-    client: TestClient, snapshot_json: JSONSnapshotExtension
+    client: TestClient, snapshot_json: SnapshotAssertion
 ) -> None:
     response = client.post("/audio_query", params={"text": "テストです", "speaker": 0})
     assert response.status_code == 200
diff --git a/test/e2e/test_openapi.py b/test/e2e/test_openapi.py
index 005d0fd1e..d26a2b7c5 100644
--- a/test/e2e/test_openapi.py
+++ b/test/e2e/test_openapi.py
@@ -1,12 +1,10 @@
 from typing import Any
 
 from fastapi import FastAPI
-from syrupy.extensions.json import JSONSnapshotExtension
+from syrupy.assertion import SnapshotAssertion
 
 
-def test_OpenAPIの形が変わっていないことを確認(
-    app: FastAPI, snapshot_json: JSONSnapshotExtension
-) -> None:
+def test_OpenAPIの形が変わっていないことを確認(app: FastAPI, snapshot_json: SnapshotAssertion) -> None:
     # 変更があった場合はREADMEの「スナップショットの更新」の手順で更新可能
     openapi: Any = app.openapi()  # snapshot_jsonがmypyに対応していないのでワークアラウンド
     assert snapshot_json == openapi
diff --git a/test/e2e/test_preset.py b/test/e2e/test_preset.py
new file mode 100644
index 000000000..d1020d07b
--- /dev/null
+++ b/test/e2e/test_preset.py
@@ -0,0 +1,12 @@
+"""
+プリセットAPIのテスト
+"""
+
+from fastapi.testclient import TestClient
+from syrupy.assertion import SnapshotAssertion
+
+
+def test_プリセット一覧を取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
+    response = client.get("/presets")
+    assert response.status_code == 200
+    assert snapshot_json == response.json()
diff --git a/test/e2e/test_validate_speakers.py b/test/e2e/test_validate_speakers.py
index b93c5f6a6..fff8a7b82 100644
--- a/test/e2e/test_validate_speakers.py
+++ b/test/e2e/test_validate_speakers.py
@@ -1,9 +1,9 @@
 from fastapi.testclient import TestClient
-from syrupy.extensions.json import JSONSnapshotExtension
+from syrupy.assertion import SnapshotAssertion
 
 
 def test_fetch_speakers_success(
-    client: TestClient, snapshot_json: JSONSnapshotExtension
+    client: TestClient, snapshot_json: SnapshotAssertion
 ) -> None:
     response = client.get("/speakers")
     assert response.status_code == 200
diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index e58f99538..047140e17 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 from numpy.typing import NDArray
-from syrupy.extensions.json import JSONSnapshotExtension
+from syrupy.assertion import SnapshotAssertion
 
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
@@ -287,7 +287,7 @@ def test_create_accent_phrases_toward_unknown():
     assert str(e.value) == "tuple.index(x): x not in tuple"
 
 
-def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> None:
+def test_mocked_update_length_output(snapshot_json: SnapshotAssertion) -> None:
     """モックされた `TTSEngine.update_length()` の出力スナップショットが一定である"""
     # Inputs
     tts_engine = TTSEngine(MockCoreWrapper())
@@ -298,7 +298,7 @@ def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> No
     assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
 
 
-def test_mocked_update_pitch_output(snapshot_json: JSONSnapshotExtension) -> None:
+def test_mocked_update_pitch_output(snapshot_json: SnapshotAssertion) -> None:
     """モックされた `TTSEngine.update_pitch()` の出力スナップショットが一定である"""
     # Inputs
     tts_engine = TTSEngine(MockCoreWrapper())
@@ -310,7 +310,7 @@ def test_mocked_update_pitch_output(snapshot_json: JSONSnapshotExtension) -> Non
 
 
 def test_mocked_update_length_and_pitch_output(
-    snapshot_json: JSONSnapshotExtension,
+    snapshot_json: SnapshotAssertion,
 ) -> None:
     """モックされた `TTSEngine.update_length_and_pitch()` の出力スナップショットが一定である"""
     # Inputs
@@ -323,7 +323,7 @@ def test_mocked_update_length_and_pitch_output(
 
 
 def test_mocked_create_accent_phrases_output(
-    snapshot_json: JSONSnapshotExtension,
+    snapshot_json: SnapshotAssertion,
 ) -> None:
     """モックされた `TTSEngine.create_accent_phrases()` の出力スナップショットが一定である"""
     # Inputs
@@ -336,7 +336,7 @@ def test_mocked_create_accent_phrases_output(
 
 
 def test_mocked_create_accent_phrases_from_kana_output(
-    snapshot_json: JSONSnapshotExtension,
+    snapshot_json: SnapshotAssertion,
 ) -> None:
     """モックされた `TTSEngine.create_accent_phrases_from_kana()` の出力スナップショットが一定である"""
     # Inputs
@@ -348,7 +348,7 @@ def test_mocked_create_accent_phrases_from_kana_output(
     assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2)
 
 
-def test_mocked_synthesize_wave_output(snapshot_json: JSONSnapshotExtension) -> None:
+def test_mocked_synthesize_wave_output(snapshot_json: SnapshotAssertion) -> None:
     """モックされた `TTSEngine.synthesize_wave()` の出力スナップショットが一定である"""
     # Inputs
     tts_engine = TTSEngine(MockCoreWrapper())

From 374e54fa43afbf607027711c75159c0cf430e9d3 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 6 Feb 2024 19:12:32 +0900
Subject: [PATCH 168/177] =?UTF-8?q?=E8=A9=B1=E8=80=85=E3=83=BB=E6=AD=8C?=
 =?UTF-8?q?=E6=89=8B=E5=8F=96=E5=BE=97API=E3=81=AEe2e=E3=83=86=E3=82=B9?=
 =?UTF-8?q?=E3=83=88=E8=BF=BD=E5=8A=A0=20(#1048)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* StyleTypeの名称変更

* 話者関連のAPIのe2eテストを整備

* filter_styles実装

* モックを更新

* speakersのフィルタ

* 除外せずにハッシュ化する

* 型を間違えてた

* Refactor test_speakers.py to improve readability and maintainability

* 関数切り出ししてsinger API追加

* テスト更新

* .gitattributes調整
---
 run.py                                        |  4 +-
 ...\343\202\222\347\242\272\350\252\215.json" |  2 +-
 ...88f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" | 11 ++++
 ...ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" | 11 ++++
 ...1a81618-b27b-40d2-b0ea-27a9ad408c4b].json" |  8 +++
 ...\343\201\247\343\201\215\343\202\213.json" | 57 +++++++++++++++++++
 ...5b2c544-660e-401e-b503-0e14c635303a].json" |  8 +++
 ...88f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" | 11 ++++
 ...ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" | 11 ++++
 ...\343\201\247\343\201\215\343\202\213.json" | 35 ------------
 test/e2e/test_speakers.py                     | 57 +++++++++++++++++++
 test/e2e/test_validate_speakers.py            | 10 ----
 voicevox_engine/dev/core/mock.py              | 20 +++++--
 13 files changed, 192 insertions(+), 53 deletions(-)
 create mode 100644 "test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
 create mode 100644 "test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
 create mode 100644 "test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json"
 create mode 100644 "test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
 create mode 100644 "test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json"
 create mode 100644 "test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
 create mode 100644 "test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
 rename test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json => "test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" (62%)
 create mode 100644 test/e2e/test_speakers.py
 delete mode 100644 test/e2e/test_validate_speakers.py

diff --git a/run.py b/run.py
index 15d18783a..9037d2d85 100644
--- a/run.py
+++ b/run.py
@@ -550,7 +550,7 @@ def morphable_targets(
         指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。
         モーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。
         プロパティが存在しない場合は、モーフィングが許可されているとみなします。
-        返り値の話者はstring型なので注意。
+        返り値のスタイルIDはstring型なので注意。
         """
         core = get_core(core_version)
 
@@ -605,7 +605,7 @@ def _synthesis_morphing(
             if not is_permitted:
                 raise HTTPException(
                     status_code=400,
-                    detail="指定された話者ペアでのモーフィングはできません",
+                    detail="指定されたスタイルペアでのモーフィングはできません",
                 )
         except StyleIdNotFoundError as e:
             raise HTTPException(
diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
index 6a173a16e..f766359ce 100644
--- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
+++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json"
@@ -2054,7 +2054,7 @@
     },
     "/morphable_targets": {
       "post": {
-        "description": "指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。\nモーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。\nプロパティが存在しない場合は、モーフィングが許可されているとみなします。\n返り値の話者はstring型なので注意。",
+        "description": "指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。\nモーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。\nプロパティが存在しない場合は、モーフィングが許可されているとみなします。\n返り値のスタイルIDはstring型なので注意。",
         "operationId": "morphable_targets_morphable_targets_post",
         "parameters": [
           {
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
new file mode 100644
index 000000000..9c95c48d5
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
@@ -0,0 +1,11 @@
+{
+  "policy": "dummy2 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "style_infos": [
+    {
+      "id": 5
+    },
+    {
+      "id": 7
+    }
+  ]
+}
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
new file mode 100644
index 000000000..de6120e14
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
@@ -0,0 +1,11 @@
+{
+  "policy": "dummy1 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "style_infos": [
+    {
+      "id": 4
+    },
+    {
+      "id": 6
+    }
+  ]
+}
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json"
new file mode 100644
index 000000000..73bb6af62
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json"
@@ -0,0 +1,8 @@
+{
+  "policy": "dummy4 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "style_infos": [
+    {
+      "id": 9
+    }
+  ]
+}
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
new file mode 100644
index 000000000..e421371b7
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
@@ -0,0 +1,57 @@
+[
+  {
+    "name": "dummy1",
+    "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
+    "styles": [
+      {
+        "id": 4,
+        "name": "style2",
+        "type": "frame_decode"
+      },
+      {
+        "id": 6,
+        "name": "style3",
+        "type": "frame_decode"
+      }
+    ],
+    "supported_features": {
+      "permitted_synthesis_morphing": "ALL"
+    },
+    "version": "mock"
+  },
+  {
+    "name": "dummy2",
+    "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9",
+    "styles": [
+      {
+        "id": 5,
+        "name": "style2",
+        "type": "frame_decode"
+      },
+      {
+        "id": 7,
+        "name": "style3",
+        "type": "sing"
+      }
+    ],
+    "supported_features": {
+      "permitted_synthesis_morphing": "SELF_ONLY"
+    },
+    "version": "mock"
+  },
+  {
+    "name": "dummy4",
+    "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
+    "styles": [
+      {
+        "id": 9,
+        "name": "style0",
+        "type": "sing"
+      }
+    ],
+    "supported_features": {
+      "permitted_synthesis_morphing": "ALL"
+    },
+    "version": "mock"
+  }
+]
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json"
new file mode 100644
index 000000000..236cc8d5a
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json"
@@ -0,0 +1,8 @@
+{
+  "policy": "dummy3 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "style_infos": [
+    {
+      "id": 8
+    }
+  ]
+}
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
new file mode 100644
index 000000000..e7f286678
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
@@ -0,0 +1,11 @@
+{
+  "policy": "dummy2 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "style_infos": [
+    {
+      "id": 1
+    },
+    {
+      "id": 3
+    }
+  ]
+}
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
new file mode 100644
index 000000000..b5566c22b
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
@@ -0,0 +1,11 @@
+{
+  "policy": "dummy1 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "style_infos": [
+    {
+      "id": 0
+    },
+    {
+      "id": 2
+    }
+  ]
+}
diff --git a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
similarity index 62%
rename from test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
rename to "test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
index fba38bc61..bf6d37e51 100644
--- a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json
+++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
@@ -12,16 +12,6 @@
         "id": 2,
         "name": "style1",
         "type": "talk"
-      },
-      {
-        "id": 4,
-        "name": "style2",
-        "type": "talk"
-      },
-      {
-        "id": 6,
-        "name": "style3",
-        "type": "talk"
       }
     ],
     "supported_features": {
@@ -42,16 +32,6 @@
         "id": 3,
         "name": "style1",
         "type": "talk"
-      },
-      {
-        "id": 5,
-        "name": "style2",
-        "type": "talk"
-      },
-      {
-        "id": 7,
-        "name": "style3",
-        "type": "talk"
       }
     ],
     "supported_features": {
@@ -73,20 +53,5 @@
       "permitted_synthesis_morphing": "NOTHING"
     },
     "version": "mock"
-  },
-  {
-    "name": "dummy4",
-    "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
-    "styles": [
-      {
-        "id": 9,
-        "name": "style0",
-        "type": "talk"
-      }
-    ],
-    "supported_features": {
-      "permitted_synthesis_morphing": "ALL"
-    },
-    "version": "mock"
   }
 ]
diff --git a/test/e2e/test_speakers.py b/test/e2e/test_speakers.py
new file mode 100644
index 000000000..683d57203
--- /dev/null
+++ b/test/e2e/test_speakers.py
@@ -0,0 +1,57 @@
+"""
+話者・歌手のテスト。
+TODO: 話者と歌手の両ドメイン共通のドメイン用語を定め、このテストファイル名を変更する。
+"""
+
+from fastapi.testclient import TestClient
+from pydantic import parse_obj_as
+from syrupy import filters
+from syrupy.assertion import SnapshotAssertion
+
+from voicevox_engine.metas.Metas import Speaker
+
+
+def test_話者一覧が取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
+    response = client.get("/speakers")
+    assert response.status_code == 200
+    assert snapshot_json == response.json()
+
+
+def test_話者の情報を取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
+    speakers = parse_obj_as(list[Speaker], client.get("/speakers").json())
+    for speaker in speakers:
+        response = client.get(
+            "/speaker_info", params={"speaker_uuid": speaker.speaker_uuid}
+        )
+        assert (
+            snapshot_json(
+                name=speaker.speaker_uuid,
+                exclude=filters.props(
+                    "portrait", "icon", "voice_samples"
+                ),  # バイナリファイル系は除外  FIXME: 除外せずにハッシュ化する
+            )
+            == response.json()
+        )
+
+
+def test_歌手一覧が取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
+    response = client.get("/singers")
+    assert response.status_code == 200
+    assert snapshot_json == response.json()
+
+
+def test_歌手の情報を取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
+    singers = parse_obj_as(list[Speaker], client.get("/singers").json())
+    for singer in singers:
+        response = client.get(
+            "/singer_info", params={"speaker_uuid": singer.speaker_uuid}
+        )
+        assert (
+            snapshot_json(
+                name=singer.speaker_uuid,
+                exclude=filters.props(
+                    "portrait", "icon", "voice_samples"
+                ),  # バイナリファイル系は除外  FIXME: 除外せずにハッシュ化する
+            )
+            == response.json()
+        )
diff --git a/test/e2e/test_validate_speakers.py b/test/e2e/test_validate_speakers.py
deleted file mode 100644
index fff8a7b82..000000000
--- a/test/e2e/test_validate_speakers.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from fastapi.testclient import TestClient
-from syrupy.assertion import SnapshotAssertion
-
-
-def test_fetch_speakers_success(
-    client: TestClient, snapshot_json: SnapshotAssertion
-) -> None:
-    response = client.get("/speakers")
-    assert response.status_code == 200
-    assert snapshot_json == response.json()
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index d0862d80c..666c55440 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -22,28 +22,31 @@ def __init__(
     def metas(self) -> str:
         return json.dumps(
             [
+                # トーク２つ・ハミング２つ
                 {
                     "name": "dummy1",
                     "styles": [
                         {"name": "style0", "id": 0},
                         {"name": "style1", "id": 2},
-                        {"name": "style2", "id": 4},
-                        {"name": "style3", "id": 6},
+                        {"name": "style2", "id": 4, "type": "frame_decode"},
+                        {"name": "style3", "id": 6, "type": "frame_decode"},
                     ],
                     "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
                     "version": "mock",
                 },
+                # トーク２つ・ハミング１つ・ソング１つ
                 {
                     "name": "dummy2",
                     "styles": [
                         {"name": "style0", "id": 1},
                         {"name": "style1", "id": 3},
-                        {"name": "style2", "id": 5},
-                        {"name": "style3", "id": 7},
+                        {"name": "style2", "id": 5, "type": "frame_decode"},
+                        {"name": "style3", "id": 7, "type": "sing"},
                     ],
                     "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9",
                     "version": "mock",
                 },
+                # トーク１つ
                 {
                     "name": "dummy3",
                     "styles": [
@@ -52,10 +55,11 @@ def metas(self) -> str:
                     "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a",
                     "version": "mock",
                 },
+                # ソング１つ
                 {
                     "name": "dummy4",
                     "styles": [
-                        {"name": "style0", "id": 9},
+                        {"name": "style0", "id": 9, "type": "sing"},
                     ],
                     "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
                     "version": "mock",
@@ -67,6 +71,8 @@ def yukarin_s_forward(
         self, length: int, phoneme_list: NDArray[np.int64], style_id: NDArray[np.int64]
     ) -> NDArray[np.float32]:
         """音素系列サイズ・音素ID系列・スタイルIDから音素長系列を生成する"""
+        # TODO: トークスタイル以外はエラーにする
+
         result = []
         # mockとしての適当な処理、特に意味はない
         for i in range(length):
@@ -87,6 +93,8 @@ def yukarin_sa_forward(
         """モーラ系列サイズ・母音系列・子音系列・アクセント位置・アクセント句区切り・スタイルIDからモーラ音高系列を生成する"""
         assert length > 1, "前後無音を必ず付与しなければならない"
 
+        # TODO: トークスタイル以外はエラーにする
+
         result = []
         # mockとしての適当な処理、特に意味はない
         for i in range(length):
@@ -118,6 +126,8 @@ def decode_forward(
         style_id: NDArray[np.int64],
     ) -> NDArray[np.float32]:
         """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDからダミー音声波形を生成する"""
+        # TODO: トークスタイル以外はエラーにする
+
         # 入力値を反映し、長さが 256 倍であるダミー配列を出力する
         result: list[NDArray[np.float32]] = []
         for i in range(length):

From ef122413041aa8896ccc218a3de5f7ccdb58dd3a Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 6 Feb 2024 22:18:56 +0900
Subject: [PATCH 169/177] =?UTF-8?q?=E3=82=BD=E3=83=B3=E3=82=B0=E3=81=AE?=
 =?UTF-8?q?=E3=83=A2=E3=83=83=E3=82=AF=E3=82=92=E8=BF=BD=E5=8A=A0=20(#1049?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 型はSnapshotAssertionだった

* スナップショットjsonをlinguist-generated=trueに

* モックを追加

* モックが動かなかったので修正、テストも追加

* 流石に１３万行は･･･

* もう少し短くする

* pysen

* 少しコメント調整
---
 ...thesize_wave_from_score_output[query].json |  268 ++
 ...nthesize_wave_from_score_output[wave].json | 3995 +++++++++++++++++
 test/tts_pipeline/test_tts_engine.py          |   59 +-
 voicevox_engine/core/core_wrapper.py          |   18 +-
 voicevox_engine/dev/core/mock.py              |   92 +
 voicevox_engine/tts_pipeline/tts_engine.py    |    1 +
 6 files changed, 4424 insertions(+), 9 deletions(-)
 create mode 100644 test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[query].json
 create mode 100644 test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[wave].json

diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[query].json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[query].json
new file mode 100644
index 000000000..ed97c822c
--- /dev/null
+++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[query].json
@@ -0,0 +1,268 @@
+[
+  [
+    {
+      "frame_length": 4,
+      "phoneme": "pau"
+    },
+    {
+      "frame_length": 6,
+      "phoneme": "d"
+    },
+    {
+      "frame_length": 4,
+      "phoneme": "o"
+    },
+    {
+      "frame_length": 8,
+      "phoneme": "r"
+    },
+    {
+      "frame_length": 13,
+      "phoneme": "e"
+    },
+    {
+      "frame_length": 4,
+      "phoneme": "m"
+    },
+    {
+      "frame_length": 21,
+      "phoneme": "i"
+    },
+    {
+      "frame_length": 3,
+      "phoneme": "pau"
+    },
+    {
+      "frame_length": 2,
+      "phoneme": "f"
+    },
+    {
+      "frame_length": 6,
+      "phoneme": "a"
+    },
+    {
+      "frame_length": 6,
+      "phoneme": "s"
+    },
+    {
+      "frame_length": 17,
+      "phoneme": "o"
+    },
+    {
+      "frame_length": 10,
+      "phoneme": "pau"
+    }
+  ],
+  [
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    262.93,
+    262.93,
+    262.93,
+    262.93,
+    262.93,
+    262.93,
+    264.0,
+    264.0,
+    264.0,
+    264.0,
+    296.53,
+    296.53,
+    296.53,
+    296.53,
+    296.53,
+    296.53,
+    296.53,
+    296.53,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    295.27,
+    332.32,
+    332.32,
+    332.32,
+    332.32,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    331.95,
+    0.0,
+    0.0,
+    0.0,
+    351.21,
+    351.21,
+    350.58,
+    350.58,
+    350.58,
+    350.58,
+    350.58,
+    350.58,
+    396.0,
+    396.0,
+    396.0,
+    396.0,
+    396.0,
+    396.0,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    395.56,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0
+  ],
+  [
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.61,
+    0.61,
+    0.61,
+    0.61,
+    0.61,
+    0.61,
+    1.53,
+    1.53,
+    1.53,
+    1.53,
+    1.96,
+    1.96,
+    1.96,
+    1.96,
+    1.96,
+    1.96,
+    1.96,
+    1.96,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    0.83,
+    1.79,
+    1.79,
+    1.79,
+    1.79,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    1.44,
+    0.0,
+    0.0,
+    0.0,
+    1.11,
+    1.11,
+    0.51,
+    0.51,
+    0.51,
+    0.51,
+    0.51,
+    0.51,
+    3.0,
+    3.0,
+    3.0,
+    3.0,
+    3.0,
+    3.0,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    2.57,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.0
+  ]
+]
diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[wave].json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[wave].json
new file mode 100644
index 000000000..3b711372a
--- /dev/null
+++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[wave].json
@@ -0,0 +1,3995 @@
+[
+  [
+    0.7
+  ],
+  [
+    1.41
+  ],
+  [
+    1.24
+  ],
+  [
+    1.34
+  ],
+  [
+    1.27
+  ],
+  [
+    1.32
+  ],
+  [
+    1.28
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.35
+  ],
+  [
+    1.45
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.45
+  ],
+  [
+    1.44
+  ],
+  [
+    1.45
+  ],
+  [
+    1.44
+  ],
+  [
+    1.45
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.44
+  ],
+  [
+    1.45
+  ],
+  [
+    1.44
+  ],
+  [
+    1.45
+  ],
+  [
+    1.43
+  ],
+  [
+    1.46
+  ],
+  [
+    1.42
+  ],
+  [
+    1.48
+  ],
+  [
+    1.38
+  ],
+  [
+    1.84
+  ],
+  [
+    2.27
+  ],
+  [
+    2.16
+  ],
+  [
+    2.22
+  ],
+  [
+    2.18
+  ],
+  [
+    2.21
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.2
+  ],
+  [
+    2.19
+  ],
+  [
+    2.21
+  ],
+  [
+    2.19
+  ],
+  [
+    2.21
+  ],
+  [
+    2.18
+  ],
+  [
+    2.22
+  ],
+  [
+    2.15
+  ],
+  [
+    2.37
+  ],
+  [
+    2.75
+  ],
+  [
+    2.7
+  ],
+  [
+    2.73
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.72
+  ],
+  [
+    2.71
+  ],
+  [
+    2.73
+  ],
+  [
+    2.71
+  ],
+  [
+    2.73
+  ],
+  [
+    2.7
+  ],
+  [
+    2.73
+  ],
+  [
+    2.7
+  ],
+  [
+    2.73
+  ],
+  [
+    2.7
+  ],
+  [
+    2.73
+  ],
+  [
+    2.7
+  ],
+  [
+    2.73
+  ],
+  [
+    2.68
+  ],
+  [
+    1.7
+  ],
+  [
+    1.5
+  ],
+  [
+    1.58
+  ],
+  [
+    1.53
+  ],
+  [
+    1.57
+  ],
+  [
+    1.53
+  ],
+  [
+    1.57
+  ],
+  [
+    1.54
+  ],
+  [
+    1.57
+  ],
+  [
+    1.54
+  ],
+  [
+    1.56
+  ],
+  [
+    1.54
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.55
+  ],
+  [
+    1.56
+  ],
+  [
+    1.54
+  ],
+  [
+    1.57
+  ],
+  [
+    1.53
+  ],
+  [
+    1.59
+  ],
+  [
+    1.48
+  ],
+  [
+    2.02
+  ],
+  [
+    2.52
+  ],
+  [
+    2.4
+  ],
+  [
+    2.47
+  ],
+  [
+    2.42
+  ],
+  [
+    2.45
+  ],
+  [
+    2.43
+  ],
+  [
+    2.45
+  ],
+  [
+    2.43
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.44
+  ],
+  [
+    2.43
+  ],
+  [
+    2.45
+  ],
+  [
+    2.43
+  ],
+  [
+    2.45
+  ],
+  [
+    2.42
+  ],
+  [
+    2.47
+  ],
+  [
+    2.31
+  ],
+  [
+    2.02
+  ],
+  [
+    2.05
+  ],
+  [
+    2.03
+  ],
+  [
+    2.05
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.04
+  ],
+  [
+    2.05
+  ],
+  [
+    2.03
+  ],
+  [
+    2.05
+  ],
+  [
+    2.03
+  ],
+  [
+    2.06
+  ],
+  [
+    2.01
+  ],
+  [
+    2.11
+  ],
+  [
+    1.65
+  ],
+  [
+    1.23
+  ],
+  [
+    1.33
+  ],
+  [
+    1.28
+  ],
+  [
+    1.32
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.28
+  ],
+  [
+    1.37
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.74
+  ],
+  [
+    1.72
+  ],
+  [
+    1.74
+  ],
+  [
+    1.72
+  ],
+  [
+    1.74
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.73
+  ],
+  [
+    1.72
+  ],
+  [
+    1.74
+  ],
+  [
+    1.72
+  ],
+  [
+    1.76
+  ],
+  [
+    1.55
+  ],
+  [
+    1.36
+  ],
+  [
+    1.41
+  ],
+  [
+    1.38
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.4
+  ],
+  [
+    1.38
+  ],
+  [
+    1.4
+  ],
+  [
+    1.38
+  ],
+  [
+    1.41
+  ],
+  [
+    1.38
+  ],
+  [
+    1.41
+  ],
+  [
+    1.38
+  ],
+  [
+    1.41
+  ],
+  [
+    1.38
+  ],
+  [
+    1.41
+  ],
+  [
+    1.38
+  ],
+  [
+    1.41
+  ],
+  [
+    1.38
+  ],
+  [
+    1.41
+  ],
+  [
+    1.38
+  ],
+  [
+    1.4
+  ],
+  [
+    1.39
+  ],
+  [
+    1.38
+  ],
+  [
+    1.41
+  ],
+  [
+    1.35
+  ],
+  [
+    1.47
+  ],
+  [
+    1.24
+  ],
+  [
+    3.51
+  ],
+  [
+    4.6
+  ],
+  [
+    4.24
+  ],
+  [
+    4.46
+  ],
+  [
+    4.3
+  ],
+  [
+    4.43
+  ],
+  [
+    4.32
+  ],
+  [
+    4.41
+  ],
+  [
+    4.34
+  ],
+  [
+    4.4
+  ],
+  [
+    4.35
+  ],
+  [
+    4.39
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.36
+  ],
+  [
+    4.37
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.37
+  ],
+  [
+    4.36
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.37
+  ],
+  [
+    4.36
+  ],
+  [
+    4.37
+  ],
+  [
+    4.36
+  ],
+  [
+    4.37
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.36
+  ],
+  [
+    4.38
+  ],
+  [
+    4.34
+  ],
+  [
+    3.65
+  ],
+  [
+    3.52
+  ],
+  [
+    3.57
+  ],
+  [
+    3.53
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.55
+  ],
+  [
+    3.54
+  ],
+  [
+    3.56
+  ],
+  [
+    3.54
+  ],
+  [
+    3.57
+  ],
+  [
+    3.53
+  ],
+  [
+    3.58
+  ],
+  [
+    3.51
+  ],
+  [
+    3.59
+  ],
+  [
+    3.49
+  ],
+  [
+    3.62
+  ],
+  [
+    3.44
+  ],
+  [
+    3.74
+  ],
+  [
+    2.8
+  ],
+  [
+    1.15
+  ],
+  [
+    1.37
+  ],
+  [
+    1.26
+  ],
+  [
+    1.32
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.3
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.31
+  ],
+  [
+    1.29
+  ],
+  [
+    1.32
+  ],
+  [
+    1.28
+  ],
+  [
+    1.33
+  ],
+  [
+    1.27
+  ],
+  [
+    1.34
+  ],
+  [
+    1.24
+  ],
+  [
+    1.41
+  ]
+]
diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py
index 047140e17..8d4c322b6 100644
--- a/test/tts_pipeline/test_tts_engine.py
+++ b/test/tts_pipeline/test_tts_engine.py
@@ -9,7 +9,14 @@
 
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.metas.Metas import StyleId
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
+from voicevox_engine.model import (
+    AccentPhrase,
+    AudioQuery,
+    FrameAudioQuery,
+    Mora,
+    Note,
+    Score,
+)
 from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
@@ -182,6 +189,21 @@ def _gen_hello_hiho_query() -> AudioQuery:
     )
 
 
+def _gen_doremi_score() -> Score:
+    return Score(
+        notes=[
+            Note(key=None, frame_length=10, lyric=""),
+            Note(key=60, frame_length=12, lyric="ど"),
+            Note(key=62, frame_length=17, lyric="れ"),
+            Note(key=64, frame_length=21, lyric="み"),
+            Note(key=None, frame_length=5, lyric=""),
+            Note(key=65, frame_length=12, lyric="ふぁ"),
+            Note(key=67, frame_length=17, lyric="そ"),
+            Note(key=None, frame_length=10, lyric=""),
+        ]
+    )
+
+
 class TestTTSEngine(TestCase):
     def setUp(self):
         super().setUp()
@@ -359,6 +381,41 @@ def test_mocked_synthesize_wave_output(snapshot_json: SnapshotAssertion) -> None
     assert snapshot_json == round_floats(result.tolist(), round_value=2)
 
 
+def test_mocked_synthesize_wave_from_score_output(
+    snapshot_json: SnapshotAssertion,
+) -> None:
+    """
+    モックされた `TTSEngine.create_sing_phoneme_and_f0_and_volume()` と
+    `TTSEngine.frame_synthsize_wave()` の出力スナップショットが一定である
+    """
+    # Inputs
+    tts_engine = TTSEngine(MockCoreWrapper())
+    doremi_srore = _gen_doremi_score()
+    # Outputs
+    result = tts_engine.create_sing_phoneme_and_f0_and_volume(doremi_srore, StyleId(1))
+    # Tests
+    assert snapshot_json(name="query") == round_floats(
+        pydantic_to_native_type(result), round_value=2
+    )
+
+    # Inputs
+    phonemes, f0, volume = result
+    doremi_query = FrameAudioQuery(
+        f0=f0,
+        volume=volume,
+        phonemes=phonemes,
+        volumeScale=1.3,
+        outputSamplingRate=1200,
+        outputStereo=False,
+    )
+    # Outputs
+    result_wave = tts_engine.frame_synthsize_wave(doremi_query, StyleId(1))
+    # Tests
+    assert snapshot_json(name="wave") == round_floats(
+        result_wave.tolist(), round_value=2
+    )
+
+
 def koreha_arimasuka_base_expected():
     return [
         AccentPhrase(
diff --git a/voicevox_engine/core/core_wrapper.py b/voicevox_engine/core/core_wrapper.py
index d8fb10f7f..5426c9750 100644
--- a/voicevox_engine/core/core_wrapper.py
+++ b/voicevox_engine/core/core_wrapper.py
@@ -763,17 +763,17 @@ def predict_sing_f0_forward(
         Parameters
         ----------
         length : int
-            音素列の長さ
+            フレームの長さ
         phoneme : NDArray[np.int64]
-            音素列
+            フレームごとの音素
         note : NDArray[np.int64]
-            ノート列
+            フレームごとのノート
         style_id : NDArray[np.int64]
             スタイル番号
         Returns
         -------
         output : NDArray[np.float32]
-            フレームごとのF0
+            フレームごとの音高
         """
         output = np.zeros((length,), dtype=np.float32)
         self.assert_core_success(
@@ -800,17 +800,19 @@ def predict_sing_volume_forward(
         Parameters
         ----------
         length : int
-            音素列の長さ
+            フレームの長さ
         phoneme : NDArray[np.int64]
-            音素列
+            フレームごとの音素
         note : NDArray[np.int64]
-            ノート列
+            フレームごとのノート
+        f0 : NDArray[np.float32]
+            フレームごとの音高
         style_id : NDArray[np.int64]
             スタイル番号
         Returns
         -------
         output : NDArray[np.float32]
-            フレームごとのF0
+            フレームごとの音量
         """
         output = np.zeros((length,), dtype=np.float32)
         self.assert_core_success(
diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 666c55440..25190ea55 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -136,6 +136,98 @@ def decode_forward(
             ] * 256
         return np.array(result, dtype=np.float32)
 
+    def predict_sing_consonant_length_forward(
+        self,
+        length: int,
+        consonant: NDArray[np.int64],
+        vowel: NDArray[np.int64],
+        note_duration: NDArray[np.int64],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.int64]:
+        """母音系列・子音系列・ノート列・スタイルIDから子音長系列を生成する"""
+        result = []
+        # mockとしての適当な処理、特に意味はない
+        for i in range(length):
+            # 子音が無い場合は長さ0
+            if consonant[0, i] == -1:
+                result.append(0)
+                continue
+
+            result.append(
+                (
+                    consonant[0, i] % 3
+                    + vowel[0, i] % 5
+                    + note_duration[0, i] % 7
+                    + style_id % 11
+                ).item()
+            )
+        return np.array(result, dtype=np.int64)
+
+    def predict_sing_f0_forward(
+        self,
+        length: int,
+        phoneme: NDArray[np.int64],
+        note: NDArray[np.int64],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
+        """音素系列・ノート系列・スタイルIDから音高系列を生成する"""
+        result = []
+        # mockとしての適当な処理。大体MIDIノートに従う周波数になるように調整
+        for i in range(length):
+            if note[0, i] == -1:
+                result.append(0)
+                continue
+            result.append(
+                (
+                    2 ** ((note[0, i] - 69) / 12)
+                    * (440 + phoneme[0, i] / 10 + style_id)
+                ).item()
+            )
+        return np.array(result, dtype=np.float32)
+
+    def predict_sing_volume_forward(
+        self,
+        length: int,
+        phoneme: NDArray[np.int64],
+        note: NDArray[np.int64],
+        f0: NDArray[np.float32],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
+        """音素系列・ノート系列・音高系列・スタイルIDから音量系列を生成する"""
+        result = []
+        # mockとしての適当な処理。大体0~10の範囲になるように調整
+        for i in range(length):
+            if note[0, i] == -1:
+                result.append(0)
+                continue
+            result.append(
+                (
+                    (phoneme[0, i] / 40)
+                    * (note[0, i] / 88)
+                    * (f0[0, i] / 440)
+                    * ((1 / 2) ** style_id)
+                    * 10
+                ).item()
+            )
+        return np.array(result, dtype=np.float32)
+
+    def sf_decode_forward(
+        self,
+        length: int,
+        phoneme: NDArray[np.int64],
+        f0: NDArray[np.float32],
+        volume: NDArray[np.float32],
+        style_id: NDArray[np.int64],
+    ) -> NDArray[np.float32]:
+        """入力からダミー音声波形を生成する"""
+        # 入力値を反映し、長さが 256 倍であるダミー配列を出力する
+        result: list[NDArray[np.float32]] = []
+        for i in range(length):
+            result += [
+                (f0[0, i] / 440) * volume[0, i] * (phoneme[0, i] / 40) + style_id
+            ] * 256
+        return np.array(result, dtype=np.float32)
+
     def supported_devices(self):
         return json.dumps(
             {
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index c0c553516..17d06c3e4 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -261,6 +261,7 @@ def calc_phoneme_lengths(
             note_duration = note_durations[i]
 
             # もし、次のノートの子音長が負になる場合、現在のノートの半分にする
+            # NOTE: 将来的にコアは非負になるのでこの処理は不要になる
             if next_consonant_length < 0:
                 next_consonant_length = consonant_lengths[i + 1] = note_duration // 2
             vowel_length = note_duration - next_consonant_length

From 2700e55ef4fc52400bd6ed6d5e77638d2b20fcb7 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Wed, 21 Feb 2024 13:22:24 +0900
Subject: [PATCH 170/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E5=B7=A8?=
 =?UTF-8?q?=E5=A4=A7=E3=81=AA=E3=83=90=E3=82=A4=E3=83=8A=E3=83=AA=E3=83=95?=
 =?UTF-8?q?=E3=82=A1=E3=82=A4=E3=83=AB=E3=81=8C=E3=81=82=E3=81=A3=E3=81=A6?=
 =?UTF-8?q?=E3=82=82snapshot=E3=81=8C=E8=89=AF=E3=81=84=E6=84=9F=E3=81=98?=
 =?UTF-8?q?=E3=81=AB=E5=8F=96=E3=82=8C=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?=
 =?UTF-8?q?=20(#1050)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...88f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" | 19 +++++++++++--
 ...ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" | 19 +++++++++++--
 ...1a81618-b27b-40d2-b0ea-27a9ad408c4b].json" | 10 ++++++-
 ...5b2c544-660e-401e-b503-0e14c635303a].json" | 10 ++++++-
 ...88f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" | 19 +++++++++++--
 ...ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" | 19 +++++++++++--
 test/e2e/test_speakers.py                     | 27 ++++++-------------
 test/utility.py                               | 17 ++++++++++++
 8 files changed, 111 insertions(+), 29 deletions(-)

diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
index 9c95c48d5..f2c119142 100644
--- "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
+++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
@@ -1,11 +1,26 @@
 {
   "policy": "dummy2 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "portrait": "MD5:72ceb00f20b2a1e449f0b45973cc8b24",
   "style_infos": [
     {
-      "id": 5
+      "icon": "MD5:5f2211c3144b8dee613056bef5893d60",
+      "id": 5,
+      "portrait": null,
+      "voice_samples": [
+        "MD5:2b7f17f6751b9f0c76950ad3bcc1a619",
+        "MD5:4bc9f14cda818955cba931b1532e18fd",
+        "MD5:9ebfc3cf3fba47513a60c464fc57c705"
+      ]
     },
     {
-      "id": 7
+      "icon": "MD5:375ecba26764b7c71ce61731b52f71f8",
+      "id": 7,
+      "portrait": null,
+      "voice_samples": [
+        "MD5:fc93b361293ce128afd8f48d4cd89bc5",
+        "MD5:b74ee50cb135ccf29c0a1be2711f8cca",
+        "MD5:08c325d1cfe72209949a77c327b60302"
+      ]
     }
   ]
 }
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
index de6120e14..1923de700 100644
--- "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
+++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
@@ -1,11 +1,26 @@
 {
   "policy": "dummy1 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "portrait": "MD5:cab33c9fdf563682108666a012dc9853",
   "style_infos": [
     {
-      "id": 4
+      "icon": "MD5:9dfc8b32d4afc3c933388ba85a8d8d12",
+      "id": 4,
+      "portrait": "MD5:2aba7f7037d00903dada4401582bf31a",
+      "voice_samples": [
+        "MD5:49c763de77c5c6be4967900b08b561a9",
+        "MD5:d9736740e3735bbf45efd792f8af7383",
+        "MD5:58bda86215149663b605d0ba0db59bde"
+      ]
     },
     {
-      "id": 6
+      "icon": "MD5:53b0f8ce874e450fc8cc5758d6ed2b03",
+      "id": 6,
+      "portrait": "MD5:6a79f7e6d8ca9087be9a0e39eac67e7b",
+      "voice_samples": [
+        "MD5:e9fbdc80f22d91a1ad96612ea60391b4",
+        "MD5:8c403062ffc5fca5605aca46778ed512",
+        "MD5:95a626ec8a36a3a550d7ba4188937cb3"
+      ]
     }
   ]
 }
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json"
index 73bb6af62..5c1b401fc 100644
--- "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json"
+++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json"
@@ -1,8 +1,16 @@
 {
   "policy": "dummy4 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "portrait": "MD5:70cb4a361935084f00f6956a4e8e4f32",
   "style_infos": [
     {
-      "id": 9
+      "icon": "MD5:e1e2fab676912fc0796a5b23320a0b67",
+      "id": 9,
+      "portrait": null,
+      "voice_samples": [
+        "MD5:fa1230e97dec17b814ec05da1709be19",
+        "MD5:714f4c4f2d3c51a1d9597a6960b8367c",
+        "MD5:bc47fd0d1ea9083c2f4621461ae072b8"
+      ]
     }
   ]
 }
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json"
index 236cc8d5a..da10a2bf0 100644
--- "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json"
+++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json"
@@ -1,8 +1,16 @@
 {
   "policy": "dummy3 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "portrait": "MD5:90b250e0976f792e2fda2b1ad2643c7b",
   "style_infos": [
     {
-      "id": 8
+      "icon": "MD5:541aeccd87319c0af159cfa13baf26cb",
+      "id": 8,
+      "portrait": "MD5:1bb8b584e8499d601a3f3bf0c3216391",
+      "voice_samples": [
+        "MD5:148c72905d47a308cbdf9858c99ef9d7",
+        "MD5:46fda5f38dec0df94445066eee9ed128",
+        "MD5:61e7d2d3180c2c891cf096f50b98e317"
+      ]
     }
   ]
 }
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
index e7f286678..b7dea7d76 100644
--- "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
+++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json"
@@ -1,11 +1,26 @@
 {
   "policy": "dummy2 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "portrait": "MD5:72ceb00f20b2a1e449f0b45973cc8b24",
   "style_infos": [
     {
-      "id": 1
+      "icon": "MD5:3248458ae11d28ec1eb482db7f1927d9",
+      "id": 1,
+      "portrait": null,
+      "voice_samples": [
+        "MD5:2cdd82264a8b0ad508ff3f5a84d5c920",
+        "MD5:14b4a96141c6b9e86ce4f38adaac1fcb",
+        "MD5:4494752eec42b718ff3b9a3fb934596a"
+      ]
     },
     {
-      "id": 3
+      "icon": "MD5:3e32a4a66bd2505cb75f91c8028d061c",
+      "id": 3,
+      "portrait": "MD5:1dd8a513f11c204c1449172b7a812be8",
+      "voice_samples": [
+        "MD5:2bd7d3be714fdfdda2e96aa98888a9bd",
+        "MD5:10a9d6d4bcd02a6fa37d13c3f7335df1",
+        "MD5:6a21d1007f8957fca45843fde1e2d1c2"
+      ]
     }
   ]
 }
diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
index b5566c22b..9d5d0588a 100644
--- "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
+++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json"
@@ -1,11 +1,26 @@
 {
   "policy": "dummy1 policy\n\nhttps://voicevox.hiroshiba.jp/\n",
+  "portrait": "MD5:cab33c9fdf563682108666a012dc9853",
   "style_infos": [
     {
-      "id": 0
+      "icon": "MD5:529b1750562ca339ba05c1b00f4b2854",
+      "id": 0,
+      "portrait": "MD5:6c1c461e54ba4f57d0c17171d17e1d80",
+      "voice_samples": [
+        "MD5:85acb767ac22b1d17915c666cc5cee90",
+        "MD5:ad9e64177d28f960fb9ce40162cd82c2",
+        "MD5:16bf0d95c463fff08353a3452bdb8d7c"
+      ]
     },
     {
-      "id": 2
+      "icon": "MD5:04cce28c375949935497ec3d5d015be9",
+      "id": 2,
+      "portrait": "MD5:0a4e78369adc266672d571f4c0663697",
+      "voice_samples": [
+        "MD5:f508aca632a8f1cfd9eee7ed29cff96c",
+        "MD5:09984e27d23eee8af13809a0f621f7fd",
+        "MD5:e3f9a4df3f537bfb9d63d1791eda73e6"
+      ]
     }
   ]
 }
diff --git a/test/e2e/test_speakers.py b/test/e2e/test_speakers.py
index 683d57203..598d41bbf 100644
--- a/test/e2e/test_speakers.py
+++ b/test/e2e/test_speakers.py
@@ -3,9 +3,10 @@
 TODO: 話者と歌手の両ドメイン共通のドメイン用語を定め、このテストファイル名を変更する。
 """
 
+from test.utility import hash_long_string
+
 from fastapi.testclient import TestClient
 from pydantic import parse_obj_as
-from syrupy import filters
 from syrupy.assertion import SnapshotAssertion
 
 from voicevox_engine.metas.Metas import Speaker
@@ -23,15 +24,9 @@ def test_話者の情報を取得できる(client: TestClient, snapshot_json: Sn
         response = client.get(
             "/speaker_info", params={"speaker_uuid": speaker.speaker_uuid}
         )
-        assert (
-            snapshot_json(
-                name=speaker.speaker_uuid,
-                exclude=filters.props(
-                    "portrait", "icon", "voice_samples"
-                ),  # バイナリファイル系は除外  FIXME: 除外せずにハッシュ化する
-            )
-            == response.json()
-        )
+        assert snapshot_json(
+            name=speaker.speaker_uuid,
+        ) == hash_long_string(response.json())
 
 
 def test_歌手一覧が取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
@@ -46,12 +41,6 @@ def test_歌手の情報を取得できる(client: TestClient, snapshot_json: Sn
         response = client.get(
             "/singer_info", params={"speaker_uuid": singer.speaker_uuid}
         )
-        assert (
-            snapshot_json(
-                name=singer.speaker_uuid,
-                exclude=filters.props(
-                    "portrait", "icon", "voice_samples"
-                ),  # バイナリファイル系は除外  FIXME: 除外せずにハッシュ化する
-            )
-            == response.json()
-        )
+        assert snapshot_json(
+            name=singer.speaker_uuid,
+        ) == hash_long_string(response.json())
diff --git a/test/utility.py b/test/utility.py
index 8a8eafbb0..b11e8ded4 100644
--- a/test/utility.py
+++ b/test/utility.py
@@ -1,3 +1,4 @@
+import hashlib
 import json
 from typing import Any
 
@@ -19,3 +20,19 @@ def round_floats(value: Any, round_value: int) -> Any:
 def pydantic_to_native_type(value: Any) -> Any:
     """pydanticの型をnativeな型に変換する"""
     return json.loads(json.dumps(value, default=pydantic_encoder))
+
+
+def hash_long_string(value: Any) -> Any:
+    """文字数が1000文字を超えるものはハッシュ化する"""
+
+    def to_hash(value: str) -> str:
+        return "MD5:" + hashlib.md5(value.encode()).hexdigest()
+
+    if isinstance(value, str):
+        return value if len(value) <= 1000 else to_hash(value)
+    elif isinstance(value, list):
+        return [hash_long_string(v) for v in value]
+    elif isinstance(value, dict):
+        return {k: hash_long_string(v) for k, v in value.items()}
+    else:
+        return value

From 7707d3911fcf2f7c686a5d28120414dc301d79fb Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 21 Feb 2024 13:22:44 +0900
Subject: [PATCH 171/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20e2e=20`/setting`?=
 =?UTF-8?q?=20=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE=E8=BF=BD=E5=8A=A0=20(#1?=
 =?UTF-8?q?057)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: e2e `/setting` テスト

* snapshotへ

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 test/e2e/__snapshots__/test_setting.ambr | 373 +++++++++++++++++++++++
 test/e2e/test_setting.py                 |  12 +
 2 files changed, 385 insertions(+)
 create mode 100644 test/e2e/__snapshots__/test_setting.ambr
 create mode 100644 test/e2e/test_setting.py

diff --git a/test/e2e/__snapshots__/test_setting.ambr b/test/e2e/__snapshots__/test_setting.ambr
new file mode 100644
index 000000000..97c020182
--- /dev/null
+++ b/test/e2e/__snapshots__/test_setting.ambr
@@ -0,0 +1,373 @@
+# serializer version: 1
+# name: test_setting画面が取得できる
+  '''
+  <!DOCTYPE html>
+  
+  <!-- 
+    VOICEVOXエンジンの設定ページです。
+    VueとBootstrapを使っています。
+    ライブラリを読み込んだあと、Vueコンポーネントの初期化が完了してからUIを表示します。
+  -->
+  
+  <html lang="ja">
+    <head>
+      <meta charset="utf-8" />
+      <title>VOICEVOX Engine 設定</title>
+      <link
+        rel="shortcut icon"
+        href="https://voicevox.hiroshiba.jp/favicon-32x32.png"
+      />
+  
+      <style>
+        .before-init-fadein {
+          animation: fadein 0.5s;
+        }
+  
+        /* 指定時間の最後に現れるフェードイン */
+        @keyframes fadein {
+          0% {
+            opacity: 0;
+          }
+          95% {
+            opacity: 0;
+          }
+          100% {
+            opacity: 1;
+          }
+        }
+      </style>
+    </head>
+  
+    <body>
+      <!-- Vueの準備が完了した後にdisplay: noneにする -->
+      <div id="before-init" style="display: block" class="before-init-fadein">
+        <p>読み込み中です。表示には数秒かかることがあります。</p>
+      </div>
+  
+      <!-- Vueの準備が完了した後にdisplay: blockにする -->
+      <div id="app" class="container p-3" style="display: none">
+        <h1 class="mb-3">{{brandName}} エンジン 設定</h1>
+  
+        <div class="alert alert-warning" role="alert">
+          変更を反映するにはエンジンの再起動が必要です。
+        </div>
+  
+        <div class="mb-3">
+          <label class="form-label">CORS Policy Mode</label>
+          <select
+            class="form-select"
+            aria-label="corsPolicyMode"
+            v-model="corsPolicyMode"
+          >
+            <option value="localapps">localapps</option>
+            <option value="all">all</option>
+          </select>
+          <div class="form-text">
+            <p class="mb-1">
+              localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。
+            </p>
+            <p class="mb-1">
+              その他のオリジンはAllow Originオプションで追加できます。
+            </p>
+            <p>allはすべてを許可します。危険性を理解した上でご利用ください。</p>
+          </div>
+        </div>
+  
+        <div class="mb-3">
+          <label class="form-label">Allow Origin</label>
+          <input
+            class="form-control"
+            type="text"
+            v-model.trim.lazy="allowOrigin"
+          />
+          <div class="form-text">
+            許可するオリジンを指定します。スペースで区切ることで複数指定できます。
+          </div>
+        </div>
+  
+        <div class="mb-3">
+          <label class="form-label">ユーザー辞書のインポート</label>
+          <div class="col-12">
+            <button
+              type="button"
+              class="btn btn-primary"
+              data-bs-toggle="modal"
+              data-bs-target="#importUserDictModal"
+            >
+              インポート
+            </button>
+          </div>
+        </div>
+  
+        <div class="mb-3">
+          <label class="form-label">ユーザー辞書のエクスポート</label>
+          <div class="col-12">
+            <a
+              download="VOICEVOXユーザー辞書.json"
+              class="btn btn-primary mb-3"
+              href="/user_dict"
+              @click="showToastWithMessage('辞書をエクスポートしました。');"
+              target="_blank"
+              rel="noopener noreferrer"
+            >
+              エクスポート
+            </a>
+          </div>
+        </div>
+  
+        <!-- ユーザー辞書インポート用モーダル -->
+        <div
+          class="modal fade"
+          id="importUserDictModal"
+          tabindex="-1"
+          aria-labelledby="importUserDictModalLabel"
+          aria-hidden="true"
+        >
+          <div class="modal-dialog">
+            <div class="modal-content">
+              <div class="modal-header">
+                <h5 class="modal-title" id="importUserDictModalLabel">
+                  ユーザー辞書のインポート
+                </h5>
+                <button
+                  type="button"
+                  class="btn-close"
+                  data-bs-dismiss="modal"
+                  aria-label="Close"
+                ></button>
+              </div>
+              <div class="modal-body">
+                <input
+                  class="form-control"
+                  type="file"
+                  accept="application/json"
+                  @change="(e) => { userDictFileForImport = e.target.files[0]; }"
+                />
+              </div>
+              <div class="modal-footer">
+                <button
+                  type="button"
+                  class="btn btn-secondary"
+                  data-bs-dismiss="modal"
+                >
+                  キャンセル
+                </button>
+                <button
+                  type="button"
+                  @click="importUserDict"
+                  class="btn btn-primary"
+                  data-bs-dismiss="modal"
+                  :disabled="userDictFileForImport == undefined"
+                >
+                  インポート
+                </button>
+              </div>
+            </div>
+          </div>
+        </div>
+  
+        <!-- トースト -->
+        <div class="position-fixed bottom-0 end-0 p-3" style="z-index: 5">
+          <div
+            class="toast align-items-center autohide text-white bg-success"
+            role="alert"
+            aria-live="assertive"
+            aria-atomic="true"
+            ref="toastElem"
+          >
+            <div class="d-flex">
+              <div class="toast-body">{{toastMessage}}</div>
+            </div>
+          </div>
+        </div>
+      </div>
+  
+      <script>
+        // Vueの初期化
+        function initVue() {
+          const { createApp, ref, watch, onMounted } = Vue;
+          createApp({
+            setup() {
+              // 設定値周り
+              const corsPolicyMode = ref(
+                "localapps"
+              );
+              const allowOrigin = ref("");
+  
+              // 設定が変更されたら自動保存
+              watch([corsPolicyMode, allowOrigin], () => {
+                const formData = new FormData();
+                formData.append("cors_policy_mode", corsPolicyMode.value);
+                formData.append("allow_origin", allowOrigin.value);
+  
+                fetch("/setting", {
+                  method: "POST",
+                  mode: "same-origin",
+                  body: formData,
+                }).then((res) => {
+                  if (res.ok) {
+                    showToastWithMessage("設定を保存しました。");
+                  } else {
+                    showToastWithMessage("設定の保存に失敗しました。");
+                  }
+                });
+              });
+  
+              // ユーザー辞書周り
+              const userDictFileForImport = ref();
+  
+              const importUserDict = () => {
+                if (userDictFileForImport.value == undefined) {
+                  throw new Error("userDictFileForImportが見つかりません。");
+                }
+  
+                const reader = new FileReader();
+                reader.addEventListener("load", async () => {
+                  const params = new URLSearchParams({
+                    override: true, // 重複するエントリを上書きする
+                  });
+                  await fetch(`/import_user_dict?${params}`, {
+                    method: "POST",
+                    mode: "same-origin",
+                    headers: { "Content-Type": "application/json" },
+                    body: reader.result,
+                  });
+  
+                  showToastWithMessage("辞書をインポートしました。");
+                });
+  
+                reader.readAsText(userDictFileForImport.value);
+              };
+  
+              // トースト
+              const toastElem = ref(undefined);
+              const bootstrapToast = ref(undefined);
+              const toastMessage = ref("");
+              onMounted(() => {
+                if (toastElem.value == undefined) {
+                  throw new Error("toastElemが見つかりません。");
+                }
+                bootstrapToast.value = new bootstrap.Toast(toastElem.value);
+              });
+              const showToastWithMessage = (message) => {
+                console.log(`showToastWithMessage: ${message}`);
+                bootstrapToast.value.show();
+                toastMessage.value = message;
+              };
+  
+              // 表示用の情報
+              const brandName = ref("DUMMY");
+  
+              // Vueの準備が完了したら表示・非表示を切り替える
+              onMounted(() => {
+                document.getElementById("before-init").style.display = "none";
+                document.getElementById("app").style.display = "block";
+              });
+  
+              return {
+                corsPolicyMode,
+                allowOrigin,
+                userDictFileForImport,
+                importUserDict,
+                toastElem,
+                toastMessage,
+                showToastWithMessage,
+                brandName,
+              };
+            },
+          }).mount("#app");
+        }
+  
+        /**
+         * CDNからscriptやCSSを読み込む。
+         * CDNが使えないときのために複数の候補を試す。
+         */
+        const loadCDN = async (scriptOrCss, candidateUrlList, integrity) => {
+          if (scriptOrCss !== "script" && scriptOrCss !== "css") {
+            throw new Error("scriptOrCssはscriptかcssを指定してください。");
+          }
+  
+          let current = 0;
+          await new Promise((resolve, reject) => {
+            const loadNext = async () => {
+              if (current >= candidateUrlList.length) {
+                reject(new Error("全てのCDNで読み込みに失敗しました。"));
+                return;
+              }
+  
+              let elem;
+              if (scriptOrCss === "script") {
+                elem = document.createElement("script");
+                elem.src = candidateUrlList[current];
+              } else {
+                elem = document.createElement("link");
+                elem.href = candidateUrlList[current];
+                elem.rel = "stylesheet";
+              }
+              elem.integrity = integrity;
+              elem.crossOrigin = "anonymous";
+              elem.onload = resolve;
+              elem.onerror = () => {
+                console.warn(
+                  `CDNの読み込みに失敗しました。 ${candidateUrlList[current]}`
+                );
+                document.head.removeChild(elem);
+                current++;
+                loadNext();
+              };
+              document.head.appendChild(elem);
+            };
+            loadNext();
+          });
+        };
+  
+        // 初期化用の関数
+        const init = async () => {
+          // ライブラリ読み込み用のPromiseリスト
+          const libraryLoadingPromises = [];
+  
+          // Bootstrapを読み込む
+          const bootstrapCssPromise = loadCDN(
+            "css",
+            [
+              "https://unpkg.com/bootstrap@5.0.2/dist/css/bootstrap.min.css",
+              "https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css",
+              "https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.0.2/css/bootstrap.min.css",
+            ],
+            "sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC"
+          );
+          libraryLoadingPromises.push(bootstrapCssPromise);
+  
+          const bootstrapScriptPromise = loadCDN(
+            "script",
+            [
+              "https://unpkg.com/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js",
+              "https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js",
+              "https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.0.2/js/bootstrap.bundle.min.js",
+            ],
+            "sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
+          );
+          libraryLoadingPromises.push(bootstrapScriptPromise);
+  
+          // Vueを読み込む
+          const vuePromise = loadCDN(
+            "script",
+            [
+              "https://unpkg.com/vue@3.3.10/dist/vue.global.js",
+              "https://cdn.jsdelivr.net/npm/vue@3.3.10/dist/vue.global.js",
+              "https://cdnjs.cloudflare.com/ajax/libs/vue/3.3.10/vue.global.js",
+            ],
+            "sha384-ttfhgYK68lNlS8ak6Z//mvUbpRbRCh43MYGuqEtK8mj/yzlKqY8GA8o3BPMi23cE"
+          );
+          libraryLoadingPromises.push(vuePromise);
+  
+          // ライブラリの読み込みが完了したらVueを初期化
+          await Promise.all(libraryLoadingPromises);
+          initVue();
+        };
+        init();
+      </script>
+    </body>
+  </html>
+  '''
+# ---
diff --git a/test/e2e/test_setting.py b/test/e2e/test_setting.py
new file mode 100644
index 000000000..4400df904
--- /dev/null
+++ b/test/e2e/test_setting.py
@@ -0,0 +1,12 @@
+"""
+setting APIのテスト
+"""
+
+from fastapi.testclient import TestClient
+from syrupy.assertion import SnapshotAssertion
+
+
+def test_setting画面が取得できる(client: TestClient, snapshot: SnapshotAssertion) -> None:
+    response = client.get("/setting")
+    assert response.status_code == 200
+    assert snapshot == response.content.decode("utf-8")

From 1c60a67f82234a375c4d08032af427f16687c25b Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 21 Feb 2024 13:30:40 +0900
Subject: [PATCH 172/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20e2e=20`/engine=5F?=
 =?UTF-8?q?manifest`=20=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE=E8=BF=BD?=
 =?UTF-8?q?=E5=8A=A0=20(#1058)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add: e2e `/engine_manifest` テスト

* fix: lint

* use hash_long_string

---------

Co-authored-by: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>
---
 ...\343\201\247\343\201\215\343\202\213.json" | 181 ++++++++++++++++++
 test/e2e/test_engine_manifest.py              |  14 ++
 2 files changed, 195 insertions(+)
 create mode 100644 "test/e2e/__snapshots__/test_engine_manifest/test_\343\202\250\343\203\263\343\202\270\343\203\263\343\203\236\343\203\213\343\203\225\343\202\247\343\202\271\343\203\210\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
 create mode 100644 test/e2e/test_engine_manifest.py

diff --git "a/test/e2e/__snapshots__/test_engine_manifest/test_\343\202\250\343\203\263\343\202\270\343\203\263\343\203\236\343\203\213\343\203\225\343\202\247\343\202\271\343\203\210\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_engine_manifest/test_\343\202\250\343\203\263\343\202\270\343\203\263\343\203\236\343\203\213\343\203\225\343\202\247\343\202\271\343\203\210\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
new file mode 100644
index 000000000..d69263afd
--- /dev/null
+++ "b/test/e2e/__snapshots__/test_engine_manifest/test_\343\202\250\343\203\263\343\202\270\343\203\263\343\203\236\343\203\213\343\203\225\343\202\247\343\202\271\343\203\210\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json"
@@ -0,0 +1,181 @@
+{
+  "brand_name": "DUMMY",
+  "default_sampling_rate": 24000,
+  "dependency_licenses": [
+    {
+      "license": "dummy license",
+      "name": "dummy library",
+      "text": "dummy license text",
+      "version": "0.0.1"
+    }
+  ],
+  "frame_rate": 93.75,
+  "icon": "MD5:f957eb4f5daedccb4eb6a5170f384bf4",
+  "manifest_version": "0.13.1",
+  "name": "DUMMY Engine",
+  "supported_features": {
+    "adjust_intonation_scale": true,
+    "adjust_mora_pitch": true,
+    "adjust_phoneme_length": true,
+    "adjust_pitch_scale": true,
+    "adjust_speed_scale": true,
+    "adjust_volume_scale": true,
+    "interrogative_upspeak": true,
+    "manage_library": true,
+    "sing": true,
+    "synthesis_morphing": true
+  },
+  "supported_vvlib_manifest_version": null,
+  "terms_of_service": "dummy teams of service",
+  "update_infos": [
+    {
+      "contributors": [
+        "Hiroshiba",
+        "y-chan"
+      ],
+      "descriptions": [
+        "ソングAPIを追加",
+        "キャラクター「四国めたん」「ずんだもん」「春日部つむぎ」「雨晴はう」「波音リツ」のハミングを追加",
+        "キャラクター「波音リツ」のソングを追加"
+      ],
+      "version": "0.16.0"
+    },
+    {
+      "contributors": [],
+      "descriptions": [
+        "ビルド成果物のディレクトリ構造を元に戻した"
+      ],
+      "version": "0.15.1"
+    },
+    {
+      "contributors": [
+        "aoirint",
+        "FujisakiEx",
+        "Hiroshiba",
+        "K-shir0",
+        "My-MC",
+        "nagi-miaow",
+        "okaits",
+        "raa0121",
+        "sabonerune",
+        "sevenc-nanashi",
+        "siketyan",
+        "stmtk1",
+        "takana-v",
+        "tarepan",
+        "tomoish",
+        "tuna2134",
+        "weweweok",
+        "whiteball",
+        "y-chan"
+      ],
+      "descriptions": [
+        "/validate_kana APIを追加",
+        "起動時のエンジン設定項目追加",
+        "ユーザー辞書のインポート・エクスポート機能追加",
+        "ビルド成果物のディレクトリ構造を変更",
+        "書き込み系APIを一括で無効化可能に",
+        "開発環境の向上",
+        "バグ修正"
+      ],
+      "version": "0.15.0"
+    },
+    {
+      "contributors": [],
+      "descriptions": [
+        "キャラクター「小夜」「ずんだもん」「もち子さん」「青山龍星」のスタイルを追加・更新"
+      ],
+      "version": "0.14.7"
+    },
+    {
+      "contributors": [],
+      "descriptions": [
+        "キャラクター「栗田まろん」「あいえるたん」「満別花丸」「琴詠ニア」を追加"
+      ],
+      "version": "0.14.6"
+    },
+    {
+      "contributors": [],
+      "descriptions": [
+        "キャラクター「中国うさぎ」を追加",
+        "キャラクター「波音リツ」「もち子さん」のスタイルを追加"
+      ],
+      "version": "0.14.5"
+    },
+    {
+      "contributors": [
+        "Hiroshiba"
+      ],
+      "descriptions": [
+        "キャラクター「春歌ナナ」「猫使アル」「猫使ビィ」を追加",
+        "バグ修正"
+      ],
+      "version": "0.14.4"
+    },
+    {
+      "contributors": [
+        "Hiroshiba"
+      ],
+      "descriptions": [
+        "キャラクター「†聖騎士 紅桜†」「雀松朱司」「麒ヶ島宗麟」を追加",
+        "同時書き込みで辞書が破損する問題を修正"
+      ],
+      "version": "0.14.3"
+    },
+    {
+      "contributors": [],
+      "descriptions": [
+        "DirectML版の生成が遅い問題を修正"
+      ],
+      "version": "0.14.2"
+    },
+    {
+      "contributors": [],
+      "descriptions": [
+        "AquesTalkライクな記法で生成した音声のバグを修正"
+      ],
+      "version": "0.14.1"
+    },
+    {
+      "contributors": [
+        "aoirint",
+        "Appletigerv",
+        "haru3me",
+        "Hiroshiba",
+        "ksk001100",
+        "masinc",
+        "misogihagi",
+        "My-MC",
+        "nebocco",
+        "PickledChair",
+        "qryxip",
+        "qwerty2501",
+        "sabonerune",
+        "sarisia",
+        "Segu-g",
+        "sevenc-nanashi",
+        "shigobu",
+        "smly",
+        "takana-v",
+        "ts-klassen",
+        "whiteball",
+        "y-chan"
+      ],
+      "descriptions": [
+        "コアをRust言語に移行",
+        "セキュリティアップデート",
+        "スタイルごとに異なる立ち絵の提供を可能に",
+        "VVPPファイルの提供",
+        "設定GUIの提供",
+        "プリセットの保存",
+        "モーフィングAPIの仕様変更",
+        "DirectML利用時に適したGPUを自動選択",
+        "開発環境の向上",
+        "バグ修正"
+      ],
+      "version": "0.14.0"
+    }
+  ],
+  "url": "https://github.com/VOICEVOX/voicevox_engine",
+  "uuid": "c7b58856-bd56-4aa1-afb7-b8415f824b06"
+}
diff --git a/test/e2e/test_engine_manifest.py b/test/e2e/test_engine_manifest.py
new file mode 100644
index 000000000..664fa93dc
--- /dev/null
+++ b/test/e2e/test_engine_manifest.py
@@ -0,0 +1,14 @@
+"""
+/engine_manifest APIのテスト
+"""
+
+from test.utility import hash_long_string
+
+from fastapi.testclient import TestClient
+from syrupy.assertion import SnapshotAssertion
+
+
+def test_エンジンマニフェストを取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
+    response = client.get("/engine_manifest")
+    assert response.status_code == 200
+    assert snapshot_json == hash_long_string(response.json())

From 8749c3e68dbc9b9686d6219770caa64e9ce7da0b Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 21 Feb 2024 19:14:02 +0900
Subject: [PATCH 173/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`tts=5Fpipeline`?=
 =?UTF-8?q?=20=E3=82=B5=E3=83=96=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC?=
 =?UTF-8?q?=E3=83=AB=E3=83=AA=E3=83=8D=E3=83=BC=E3=83=A0=20(#1062)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: `tts_pipeline` サブモジュールリネーム
---
 test/tts_pipeline/{test_mora_list.py => test_mora_mapping.py} | 2 +-
 .../{test_acoustic_feature_extractor.py => test_phoneme.py}   | 2 +-
 voicevox_engine/tts_pipeline/kana_converter.py                | 4 ++--
 .../tts_pipeline/{mora_list.py => mora_mapping.py}            | 2 +-
 .../{acoustic_feature_extractor.py => phoneme.py}             | 0
 voicevox_engine/tts_pipeline/text_analyzer.py                 | 2 +-
 voicevox_engine/tts_pipeline/tts_engine.py                    | 4 ++--
 7 files changed, 8 insertions(+), 8 deletions(-)
 rename test/tts_pipeline/{test_mora_list.py => test_mora_mapping.py} (91%)
 rename test/tts_pipeline/{test_acoustic_feature_extractor.py => test_phoneme.py} (96%)
 rename voicevox_engine/tts_pipeline/{mora_list.py => mora_mapping.py} (99%)
 rename voicevox_engine/tts_pipeline/{acoustic_feature_extractor.py => phoneme.py} (100%)

diff --git a/test/tts_pipeline/test_mora_list.py b/test/tts_pipeline/test_mora_mapping.py
similarity index 91%
rename from test/tts_pipeline/test_mora_list.py
rename to test/tts_pipeline/test_mora_mapping.py
index 5cc497aeb..0791c0ef4 100644
--- a/test/tts_pipeline/test_mora_list.py
+++ b/test/tts_pipeline/test_mora_mapping.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
 
-from voicevox_engine.tts_pipeline.mora_list import mora_phonemes_to_mora_kana
+from voicevox_engine.tts_pipeline.mora_mapping import mora_phonemes_to_mora_kana
 
 
 class TestOpenJTalkMoraList(TestCase):
diff --git a/test/tts_pipeline/test_acoustic_feature_extractor.py b/test/tts_pipeline/test_phoneme.py
similarity index 96%
rename from test/tts_pipeline/test_acoustic_feature_extractor.py
rename to test/tts_pipeline/test_phoneme.py
index 679e981c1..cc6dd8315 100644
--- a/test/tts_pipeline/test_acoustic_feature_extractor.py
+++ b/test/tts_pipeline/test_phoneme.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from voicevox_engine.tts_pipeline.acoustic_feature_extractor import Phoneme
+from voicevox_engine.tts_pipeline.phoneme import Phoneme
 
 TRUE_NUM_PHONEME = 45
 
diff --git a/voicevox_engine/tts_pipeline/kana_converter.py b/voicevox_engine/tts_pipeline/kana_converter.py
index c49ab8461..5310701cf 100644
--- a/voicevox_engine/tts_pipeline/kana_converter.py
+++ b/voicevox_engine/tts_pipeline/kana_converter.py
@@ -17,8 +17,8 @@
 from typing import List, Optional
 
 from ..model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
-from .acoustic_feature_extractor import Vowel
-from .mora_list import mora_kana_to_mora_phonemes
+from .mora_mapping import mora_kana_to_mora_phonemes
+from .phoneme import Vowel
 
 _LOOP_LIMIT = 300
 
diff --git a/voicevox_engine/tts_pipeline/mora_list.py b/voicevox_engine/tts_pipeline/mora_mapping.py
similarity index 99%
rename from voicevox_engine/tts_pipeline/mora_list.py
rename to voicevox_engine/tts_pipeline/mora_mapping.py
index c10931ae2..6a36775f2 100644
--- a/voicevox_engine/tts_pipeline/mora_list.py
+++ b/voicevox_engine/tts_pipeline/mora_mapping.py
@@ -44,7 +44,7 @@
 
 from typing import Literal
 
-from .acoustic_feature_extractor import BaseVowel, Consonant
+from .phoneme import BaseVowel, Consonant
 
 # AquesTalk 風記法で記述されるモーラ（無声化 `_` を除く）
 _MoraKana = Literal[
diff --git a/voicevox_engine/tts_pipeline/acoustic_feature_extractor.py b/voicevox_engine/tts_pipeline/phoneme.py
similarity index 100%
rename from voicevox_engine/tts_pipeline/acoustic_feature_extractor.py
rename to voicevox_engine/tts_pipeline/phoneme.py
diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py
index 37fce007b..cbb9736e8 100644
--- a/voicevox_engine/tts_pipeline/text_analyzer.py
+++ b/voicevox_engine/tts_pipeline/text_analyzer.py
@@ -6,7 +6,7 @@
 import pyopenjtalk
 
 from ..model import AccentPhrase, Mora
-from .mora_list import mora_phonemes_to_mora_kana
+from .mora_mapping import mora_phonemes_to_mora_kana
 
 OjtVowel = Literal[
     "A", "E", "I", "N", "O", "U", "a", "cl", "e", "i", "o", "pau", "sil", "u"
diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
index 17d06c3e4..f80fcca13 100644
--- a/voicevox_engine/tts_pipeline/tts_engine.py
+++ b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -10,9 +10,9 @@
 from ..core.core_wrapper import CoreWrapper
 from ..metas.Metas import StyleId
 from ..model import AccentPhrase, AudioQuery, FrameAudioQuery, FramePhoneme, Mora, Score
-from .acoustic_feature_extractor import Phoneme
 from .kana_converter import parse_kana
-from .mora_list import mora_kana_to_mora_phonemes, mora_phonemes_to_mora_kana
+from .mora_mapping import mora_kana_to_mora_phonemes, mora_phonemes_to_mora_kana
+from .phoneme import Phoneme
 from .text_analyzer import text_to_accent_phrases
 
 # 疑問文語尾定数

From 709527be089c0410c08e989df95a4a1d78439423 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 21 Feb 2024 19:32:29 +0900
Subject: [PATCH 174/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`dict`=20?=
 =?UTF-8?q?=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E3=81=B8=E3=81=AE?=
 =?UTF-8?q?=E7=A7=BB=E6=A4=8D=20(#1061)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `dict` モジュールへ移植

* fix: lint

* fix: モジュール名

* fix: リネーム忘れ
---
 build_util/get_cost_candidates.py                      | 2 +-
 run.py                                                 | 4 ++--
 test/user_dict/test_user_dict.py                       | 7 +++++--
 test/user_dict/test_word_types.py                      | 2 +-
 voicevox_engine/{ => user_dict}/part_of_speech_data.py | 2 +-
 voicevox_engine/{ => user_dict}/user_dict.py           | 6 +++---
 6 files changed, 13 insertions(+), 10 deletions(-)
 rename voicevox_engine/{ => user_dict}/part_of_speech_data.py (99%)
 rename voicevox_engine/{ => user_dict}/user_dict.py (99%)

diff --git a/build_util/get_cost_candidates.py b/build_util/get_cost_candidates.py
index 785a0c4df..2eabb4eeb 100644
--- a/build_util/get_cost_candidates.py
+++ b/build_util/get_cost_candidates.py
@@ -1,5 +1,5 @@
 """
-voicevox_engine/part_of_speech_data.pyのcost_candidatesを計算するプログラムです。
+voicevox_engine/user_dict/part_of_speech_data.pyのcost_candidatesを計算するプログラムです。
 引数のnaist_jdic_pathには、open_jtalkのsrc/mecab-naist-jdic/naist-jdic.csvを指定してください。
 
 実行例:
diff --git a/run.py b/run.py
index 9037d2d85..3dc1f23bb 100644
--- a/run.py
+++ b/run.py
@@ -67,7 +67,6 @@
 from voicevox_engine.morphing import (
     synthesis_morphing_parameter as _synthesis_morphing_parameter,
 )
-from voicevox_engine.part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY
 from voicevox_engine.preset.Preset import Preset
 from voicevox_engine.preset.PresetError import PresetError
 from voicevox_engine.preset.PresetManager import PresetManager
@@ -78,7 +77,8 @@
     TTSEngine,
     make_tts_engines_from_cores,
 )
-from voicevox_engine.user_dict import (
+from voicevox_engine.user_dict.part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY
+from voicevox_engine.user_dict.user_dict import (
     apply_word,
     delete_word,
     import_user_dict,
diff --git a/test/user_dict/test_user_dict.py b/test/user_dict/test_user_dict.py
index f14b5432f..f284b74e3 100644
--- a/test/user_dict/test_user_dict.py
+++ b/test/user_dict/test_user_dict.py
@@ -8,8 +8,11 @@
 from pyopenjtalk import g2p, unset_user_dict
 
 from voicevox_engine.model import UserDictWord, WordTypes
-from voicevox_engine.part_of_speech_data import MAX_PRIORITY, part_of_speech_data
-from voicevox_engine.user_dict import (
+from voicevox_engine.user_dict.part_of_speech_data import (
+    MAX_PRIORITY,
+    part_of_speech_data,
+)
+from voicevox_engine.user_dict.user_dict import (
     _create_word,
     apply_word,
     delete_word,
diff --git a/test/user_dict/test_word_types.py b/test/user_dict/test_word_types.py
index 1f2635b68..e26ce192f 100644
--- a/test/user_dict/test_word_types.py
+++ b/test/user_dict/test_word_types.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 
 from voicevox_engine.model import WordTypes
-from voicevox_engine.part_of_speech_data import part_of_speech_data
+from voicevox_engine.user_dict.part_of_speech_data import part_of_speech_data
 
 
 class TestWordTypes(TestCase):
diff --git a/voicevox_engine/part_of_speech_data.py b/voicevox_engine/user_dict/part_of_speech_data.py
similarity index 99%
rename from voicevox_engine/part_of_speech_data.py
rename to voicevox_engine/user_dict/part_of_speech_data.py
index 8950e47c8..302b61381 100644
--- a/voicevox_engine/part_of_speech_data.py
+++ b/voicevox_engine/user_dict/part_of_speech_data.py
@@ -1,6 +1,6 @@
 from typing import Dict
 
-from .model import (
+from ..model import (
     USER_DICT_MAX_PRIORITY,
     USER_DICT_MIN_PRIORITY,
     PartOfSpeechDetail,
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict/user_dict.py
similarity index 99%
rename from voicevox_engine/user_dict.py
rename to voicevox_engine/user_dict/user_dict.py
index ed05c7615..ae4776893 100644
--- a/voicevox_engine/user_dict.py
+++ b/voicevox_engine/user_dict/user_dict.py
@@ -10,10 +10,10 @@
 import pyopenjtalk
 from fastapi import HTTPException
 
-from .model import UserDictWord, WordTypes
+from ..model import UserDictWord, WordTypes
+from ..utility.mutex_utility import mutex_wrapper
+from ..utility.path_utility import engine_root, get_save_dir
 from .part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY, part_of_speech_data
-from .utility.mutex_utility import mutex_wrapper
-from .utility.path_utility import engine_root, get_save_dir
 
 root_dir = engine_root()
 save_dir = get_save_dir()

From 346e1f7333fd5fdb5c43902e92bf2ac3f082ea1f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Wed, 21 Feb 2024 19:36:36 +0900
Subject: [PATCH 175/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`SettingLoader`?=
 =?UTF-8?q?=20=E3=83=AA=E3=83=8D=E3=83=BC=E3=83=A0=20(#1060)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: `SettingLoader` リネーム

* fix: typo

* fix: メソッド名
---
 build_util/make_docs.py                  |  4 ++--
 run.py                                   | 12 ++++++------
 test/e2e/conftest.py                     |  4 ++--
 test/setting/test_setting.py             | 24 ++++++++++++------------
 voicevox_engine/setting/SettingLoader.py |  9 +++++----
 5 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/build_util/make_docs.py b/build_util/make_docs.py
index e36295bf6..14bf53f75 100644
--- a/build_util/make_docs.py
+++ b/build_util/make_docs.py
@@ -4,7 +4,7 @@
 from voicevox_engine.dev.core.mock import MockCoreWrapper
 from voicevox_engine.dev.tts_engine.mock import MockTTSEngine
 from voicevox_engine.preset.PresetManager import PresetManager
-from voicevox_engine.setting.SettingLoader import USER_SETTING_PATH, SettingLoader
+from voicevox_engine.setting.SettingLoader import USER_SETTING_PATH, SettingHandler
 from voicevox_engine.tts_pipeline.tts_engine import CoreAdapter
 from voicevox_engine.utility.path_utility import engine_root
 
@@ -41,7 +41,7 @@ def generate_api_docs_html(schema: str) -> str:
         tts_engines={"mock": MockTTSEngine()},
         cores={"mock": CoreAdapter(mock_core)},
         latest_core_version="mock",
-        setting_loader=SettingLoader(USER_SETTING_PATH),
+        setting_loader=SettingHandler(USER_SETTING_PATH),
         preset_manager=PresetManager(  # FIXME: impl MockPresetManager
             preset_path=engine_root() / "presets.yaml",
         ),
diff --git a/run.py b/run.py
index 3dc1f23bb..34671965a 100644
--- a/run.py
+++ b/run.py
@@ -71,7 +71,7 @@
 from voicevox_engine.preset.PresetError import PresetError
 from voicevox_engine.preset.PresetManager import PresetManager
 from voicevox_engine.setting.Setting import CorsPolicyMode, Setting
-from voicevox_engine.setting.SettingLoader import USER_SETTING_PATH, SettingLoader
+from voicevox_engine.setting.SettingLoader import USER_SETTING_PATH, SettingHandler
 from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
 from voicevox_engine.tts_pipeline.tts_engine import (
     TTSEngine,
@@ -135,7 +135,7 @@ def generate_app(
     tts_engines: dict[str, TTSEngine],
     cores: dict[str, CoreAdapter],
     latest_core_version: str,
-    setting_loader: SettingLoader,
+    setting_loader: SettingHandler,
     preset_manager: PresetManager,
     cancellable_engine: CancellableEngine | None = None,
     root_dir: Optional[Path] = None,
@@ -1310,7 +1310,7 @@ def setting_get(request: Request) -> Response:
         """
         設定ページを返します。
         """
-        settings = setting_loader.load_setting_file()
+        settings = setting_loader.load()
 
         brand_name = engine_manifest_data.brand_name
         cors_policy_mode = settings.cors_policy_mode
@@ -1348,7 +1348,7 @@ def setting_post(
         )
 
         # 更新した設定へ上書き
-        setting_loader.dump_setting_file(settings)
+        setting_loader.save(settings)
 
         return Response(status_code=204)
 
@@ -1552,9 +1552,9 @@ def main() -> None:
     if root_dir is None:
         root_dir = engine_root()
 
-    setting_loader = SettingLoader(args.setting_file)
+    setting_loader = SettingHandler(args.setting_file)
 
-    settings = setting_loader.load_setting_file()
+    settings = setting_loader.load()
 
     cors_policy_mode: CorsPolicyMode | None = args.cors_policy_mode
     if cors_policy_mode is None:
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
index a2979e3ba..df5bb1744 100644
--- a/test/e2e/conftest.py
+++ b/test/e2e/conftest.py
@@ -7,7 +7,7 @@
 
 from voicevox_engine.core.core_initializer import initialize_cores
 from voicevox_engine.preset.PresetManager import PresetManager
-from voicevox_engine.setting.SettingLoader import SettingLoader
+from voicevox_engine.setting.SettingLoader import SettingHandler
 from voicevox_engine.tts_pipeline.tts_engine import make_tts_engines_from_cores
 from voicevox_engine.utility.core_version_utility import get_latest_core_version
 
@@ -17,7 +17,7 @@ def app_params():
     cores = initialize_cores(use_gpu=False, enable_mock=True)
     tts_engines = make_tts_engines_from_cores(cores)
     latest_core_version = get_latest_core_version(versions=list(tts_engines.keys()))
-    setting_loader = SettingLoader(Path("./not_exist.yaml"))
+    setting_loader = SettingHandler(Path("./not_exist.yaml"))
     preset_manager = PresetManager(  # FIXME: impl MockPresetManager
         preset_path=Path("./presets.yaml"),
     )
diff --git a/test/setting/test_setting.py b/test/setting/test_setting.py
index 0a05083ba..468e76b11 100644
--- a/test/setting/test_setting.py
+++ b/test/setting/test_setting.py
@@ -3,7 +3,7 @@
 from unittest import TestCase
 
 from voicevox_engine.setting.Setting import CorsPolicyMode, Setting
-from voicevox_engine.setting.SettingLoader import SettingLoader
+from voicevox_engine.setting.SettingLoader import SettingHandler
 
 
 class TestSettingLoader(TestCase):
@@ -12,8 +12,8 @@ def setUp(self):
         self.tmp_dir_path = Path(self.tmp_dir.name)
 
     def test_loading_1(self):
-        setting_loader = SettingLoader(Path("not_exist.yaml"))
-        settings = setting_loader.load_setting_file()
+        setting_loader = SettingHandler(Path("not_exist.yaml"))
+        settings = setting_loader.load()
 
         self.assertEqual(
             settings.dict(),
@@ -21,10 +21,10 @@ def test_loading_1(self):
         )
 
     def test_loading_2(self):
-        setting_loader = SettingLoader(
+        setting_loader = SettingHandler(
             setting_file_path=Path("test/setting/setting-test-load-1.yaml")
         )
-        settings = setting_loader.load_setting_file()
+        settings = setting_loader.load()
 
         self.assertEqual(
             settings.dict(),
@@ -32,10 +32,10 @@ def test_loading_2(self):
         )
 
     def test_loading_3(self):
-        setting_loader = SettingLoader(
+        setting_loader = SettingHandler(
             setting_file_path=Path("test/setting/setting-test-load-2.yaml")
         )
-        settings = setting_loader.load_setting_file()
+        settings = setting_loader.load()
 
         self.assertEqual(
             settings.dict(),
@@ -43,10 +43,10 @@ def test_loading_3(self):
         )
 
     def test_loading_4(self):
-        setting_loader = SettingLoader(
+        setting_loader = SettingHandler(
             setting_file_path=Path("test/setting/setting-test-load-3.yaml")
         )
-        settings = setting_loader.load_setting_file()
+        settings = setting_loader.load()
 
         self.assertEqual(
             settings.dict(),
@@ -57,15 +57,15 @@ def test_loading_4(self):
         )
 
     def test_dump(self):
-        setting_loader = SettingLoader(
+        setting_loader = SettingHandler(
             setting_file_path=Path(self.tmp_dir_path / "setting-test-dump.yaml")
         )
         settings = Setting(cors_policy_mode=CorsPolicyMode.localapps)
-        setting_loader.dump_setting_file(settings)
+        setting_loader.save(settings)
 
         self.assertTrue(setting_loader.setting_file_path.is_file())
         self.assertEqual(
-            setting_loader.load_setting_file().dict(),
+            setting_loader.load().dict(),
             {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps},
         )
 
diff --git a/voicevox_engine/setting/SettingLoader.py b/voicevox_engine/setting/SettingLoader.py
index 0e7e5ca90..da6e82219 100644
--- a/voicevox_engine/setting/SettingLoader.py
+++ b/voicevox_engine/setting/SettingLoader.py
@@ -8,7 +8,7 @@
 USER_SETTING_PATH: Path = get_save_dir() / "setting.yml"
 
 
-class SettingLoader:
+class SettingHandler:
     def __init__(self, setting_file_path: Path) -> None:
         """
         設定ファイルの管理
@@ -19,8 +19,8 @@ def __init__(self, setting_file_path: Path) -> None:
         """
         self.setting_file_path = setting_file_path
 
-    def load_setting_file(self) -> Setting:
-        # 設定値の読み込み
+    def load(self) -> Setting:
+        """設定値をファイルから読み込む。"""
         if not self.setting_file_path.is_file():
             # 設定ファイルが存在しないためデフォルト値を取得
             setting = {"allow_origin": None, "cors_policy_mode": "localapps"}
@@ -34,7 +34,8 @@ def load_setting_file(self) -> Setting:
             allow_origin=setting["allow_origin"],
         )
 
-    def dump_setting_file(self, settings: Setting) -> None:
+    def save(self, settings: Setting) -> None:
+        """設定値をファイルへ書き込む。"""
         settings_dict = settings.dict()
 
         with open(self.setting_file_path, mode="w", encoding="utf-8") as f:

From 2ed3e94ee846828cc50aaef0b0c4d6296fb10fe8 Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 24 Feb 2024 02:38:59 +0900
Subject: [PATCH 176/177] =?UTF-8?q?refactor:=20Windows=E3=82=B3=E3=82=A2?=
 =?UTF-8?q?=E8=A7=A3=E5=87=8D=20by=20unzip=20(#1068)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/build.yml | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 470a14ac2..06501d043 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -420,14 +420,7 @@ jobs:
           VOICEVOX_CORE_ASSET_NAME: ${{ matrix.voicevox_core_asset_prefix }}-${{ env.VOICEVOX_CORE_VERSION }}
         run: |
           curl -L "https://github.com/VOICEVOX/voicevox_core/releases/download/${{ env.VOICEVOX_CORE_VERSION }}/${{ env.VOICEVOX_CORE_ASSET_NAME }}.zip" > download/${{ env.VOICEVOX_CORE_ASSET_NAME }}.zip
-          # NOTE: Windows 版コアのみ PowerShell の Compress-Archive コマンドレットを用いて zip を作成している（デフォルト状態では zip コマンドが存在していないため）。
-          #       このコマンドはバージョンによっては作成した zip 内のパスの区切り文字がバックスラッシュになる。 (cf. https://github.com/PowerShell/Microsoft.PowerShell.Archive/issues/48)
-          #       unzip コマンドはこのような zip ファイルを解凍できるものの、終了コード 1 を報告して CI が落ちてしまう。
-          #       回避策として、unzip コマンドの代わりに 7z コマンドを用いて zip ファイルを解凍する。
-          # unzip download/${{ env.VOICEVOX_CORE_ASSET_NAME }}.zip -d download/
-          if [[ ${{ matrix.os }} == windows-* ]]; then
-            7z x -o"download" download/${{ env.VOICEVOX_CORE_ASSET_NAME }}.zip
-          elif [[ ${{ matrix.os }} == mac-* ]]; then
+          if [[ ${{ matrix.os }} == mac-* ]]; then
             ditto -x -k --sequesterRsrc --rsrc download/${{ env.VOICEVOX_CORE_ASSET_NAME }}.zip download/
           else
             unzip download/${{ env.VOICEVOX_CORE_ASSET_NAME }}.zip -d download/ 

From f181411ec69812296989d9cc583826c22eec87ae Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Sat, 24 Feb 2024 02:57:09 +0900
Subject: [PATCH 177/177] =?UTF-8?q?=E6=95=B4=E7=90=86:=20MacOS=20=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=E8=BF=BD=E5=8A=A0=20(#1069)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refactor: MacOS テスト追加
---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 3666063d5..3dfeb478c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04, windows-latest] # [ubuntu-20.04, macos-latest, windows-latest]
+        os: [ubuntu-20.04, macos-latest, windows-latest]
         python: ["3.11.3"]
 
     steps: