Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: AudioQueryのJSON表現をENGINEと同じにする #946

Merged
merged 11 commits into from
Feb 3, 2025
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ futures-core = "0.3.31"
futures-util = "0.3.31"
futures-lite = "2.3.0"
futures-io = "0.3.31"
heck = "0.4.1"
humansize = "2.1.3"
indexmap = "2.6.0"
indicatif = "0.17.8"
Expand Down
1 change: 0 additions & 1 deletion crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ voicevox-ort = { workspace = true, features = ["download-binaries", "__init-for-
voicevox_core_macros.workspace = true

[dev-dependencies]
heck.workspace = true
pollster = { workspace = true, features = ["macro"] }
pretty_assertions.workspace = true
rstest.workspace = true
Expand Down
103 changes: 36 additions & 67 deletions crates/voicevox_core/src/engine/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,42 @@ pub struct AudioQuery {
/// アクセント句の配列。
pub accent_phrases: Vec<AccentPhrase>,
/// 全体の話速。
#[serde(rename = "speedScale")]
pub speed_scale: f32,
/// 全体の音高。
#[serde(rename = "pitchScale")]
pub pitch_scale: f32,
/// 全体の抑揚。
#[serde(rename = "intonationScale")]
pub intonation_scale: f32,
/// 全体の音量。
#[serde(rename = "volumeScale")]
pub volume_scale: f32,
/// 音声の前の無音時間。
#[serde(rename = "prePhonemeLength")]
pub pre_phoneme_length: f32,
/// 音声の後の無音時間。
#[serde(rename = "postPhonemeLength")]
pub post_phoneme_length: f32,
/// 音声データの出力サンプリングレート。
#[serde(rename = "outputSamplingRate")]
pub output_sampling_rate: u32,
/// 音声データをステレオ出力するか否か。
#[serde(rename = "outputStereo")]
pub output_stereo: bool,
// TODO: VOICEVOX/voicevox_engine#1308 を実装する
/// 句読点などの無音時間。`null`のときは無視される。デフォルト値は`null`。
#[serde(
default,
rename = "pauseLength",
deserialize_with = "deserialize_pause_length",
serialize_with = "serialize_pause_length"
)]
pub pause_length: (),
/// 読点などの無音時間(倍率)。デフォルト値は`1`。
#[serde(
default,
rename = "pauseLengthScale",
deserialize_with = "deserialize_pause_length_scale",
serialize_with = "serialize_pause_length_scale"
)]
Expand Down Expand Up @@ -183,52 +193,11 @@ impl AudioQuery {

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use rstest::rstest;
use serde_json::json;

use super::AudioQuery;

#[rstest]
fn check_audio_query_model_json_field_snake_case() {
let audio_query_model = AudioQuery {
accent_phrases: vec![],
speed_scale: 0.0,
pitch_scale: 0.0,
intonation_scale: 0.0,
volume_scale: 0.0,
pre_phoneme_length: 0.0,
post_phoneme_length: 0.0,
output_sampling_rate: 0,
output_stereo: false,
pause_length: (),
pause_length_scale: (),
kana: None,
};
let val = serde_json::to_value(audio_query_model).unwrap();
check_json_field_snake_case(&val);
}

fn check_json_field_snake_case(val: &serde_json::Value) {
use serde_json::Value::*;

match val {
Object(obj) => {
for (k, v) in obj.iter() {
use heck::ToSnakeCase as _;
assert_eq!(k.to_snake_case(), *k, "should be snake case {k}");
check_json_field_snake_case(v);
}
}
Array(array) => {
for val in array.iter() {
check_json_field_snake_case(val);
}
}
_ => {}
}
}

#[rstest]
fn it_accepts_json_without_optional_fields() -> anyhow::Result<()> {
serde_json::from_value::<AudioQuery>(json!({
Expand All @@ -245,14 +214,14 @@ mod tests {
"accent": 1
}
],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false
}))?;
Ok(())
}
Expand All @@ -262,15 +231,15 @@ mod tests {
fn it_denies_non_null_for_pause_length() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length": "aaaaa"
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false,
"pauseLength": "aaaaa"
}))
.map(|_| ())
.unwrap_err();
Expand All @@ -281,15 +250,15 @@ mod tests {
fn it_denies_non_float_for_pause_length_scale() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length_scale": "aaaaa",
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false,
"pauseLengthScale": "aaaaa",
}))
.map(|_| ())
.unwrap_err();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,66 +16,42 @@ public class AudioQuery {
public List<AccentPhrase> accentPhrases;

/** 全体の話速。 */
@SerializedName("speed_scale")
@Expose
public double speedScale;
@Expose public double speedScale;

/** 全体の音高。 */
@SerializedName("pitch_scale")
@Expose
public double pitchScale;
@Expose public double pitchScale;

/** 全体の抑揚。 */
@SerializedName("intonation_scale")
@Expose
public double intonationScale;
@Expose public double intonationScale;

/** 全体の音量。 */
@SerializedName("volume_scale")
@Expose
public double volumeScale;
@Expose public double volumeScale;

/** 音声の前の無音時間。 */
@SerializedName("pre_phoneme_length")
@Expose
public double prePhonemeLength;
@Expose public double prePhonemeLength;

/** 音声の後の無音時間。 */
@SerializedName("post_phoneme_length")
@Expose
public double postPhonemeLength;
@Expose public double postPhonemeLength;

/** 音声データの出力サンプリングレート。 */
@SerializedName("output_sampling_rate")
@Expose
public int outputSamplingRate;
@Expose public int outputSamplingRate;

/** 音声データをステレオ出力するか否か。 */
@SerializedName("output_stereo")
@Expose
public boolean outputStereo;
@Expose public boolean outputStereo;

/** 句読点などの無音時間。{@code null}のときは無視される。デフォルト値は{@code null}。 */
@SerializedName("pause_length")
@Expose
@Nullable
public Double pauseLength;
@Expose @Nullable public Double pauseLength;

/** 読点などの無音時間(倍率)。デフォルト値は{@code 1.}。 */
@SerializedName("pause_length_scale")
@Expose
public double pauseLengthScale;
@Expose public double pauseLengthScale;

/**
* [読み取り専用] AquesTalk風記法。
*
* <p>{@link jp.hiroshiba.voicevoxcore.blocking.Synthesizer#createAudioQuery} が返すもののみ String
* となる。入力としてのAudioQueryでは無視される。
*/
@SerializedName("kana")
@Expose
@Nullable
public final String kana;
@Expose @Nullable public final String kana;

public AudioQuery() {
this.accentPhrases = new ArrayList<>();
Expand Down
16 changes: 8 additions & 8 deletions crates/voicevox_core_python_api/python/test/test_audio_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ def test_accept_json_without_optional_fields() -> None:
"accent": 1
}
],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false
}
""",
)
Expand Down
36 changes: 34 additions & 2 deletions crates/voicevox_core_python_api/python/voicevox_core/_models.py
qryxip marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from typing import NewType
from uuid import UUID

import pydantic
import pydantic.alias_generators
from pydantic import ConfigDict

from ._rust import _to_zenkaku, _validate_pronunciation

Expand Down Expand Up @@ -186,7 +187,17 @@ class AccentPhrase:
"""疑問系かどうか。"""


@pydantic.dataclasses.dataclass
def _rename_audio_query_field(name: str) -> str:
match name:
case "accent_phrases":
return "accent_phrases"
case _:
return pydantic.alias_generators.to_camel(name)
Hiroshiba marked this conversation as resolved.
Show resolved Hide resolved


@pydantic.dataclasses.dataclass(
config=ConfigDict(alias_generator=_rename_audio_query_field),
)
Copy link
Member Author

@qryxip qryxip Jan 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alias - Pydantic

なんか幸いなことに、特に設定しない場合の挙動はエイリアスというよりはリネームっぽい。snake_caseなやつを受け付けなくなった。
(ただしこれはdataclassとしてのフィールド名には影響しないので、そっちの対処はこの下の__post_init__で行う。)

class AudioQuery:
"""AudioQuery (音声合成用のクエリ)。"""

Expand Down Expand Up @@ -231,6 +242,27 @@ class AudioQuery:
のAudioQueryでは無視される。
"""

# `dataclasses.asdict`の内部実装に依存したハックだが、他に方法が思い付かなかった。
Copy link
Member Author

@qryxip qryxip Jan 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

よく考えてみれば別にそんなにハックじゃないような気がしてきた。問題はdataclasses.Field.nameを外から書き換えていいのかといったところか。駄目とは書かれていない。

def __post_init__(self) -> None:
"""
:func:`dataclasses.asdict` にてキーが正しい名前になるよう、 ``dataclass``
としてのフィールドをハックする。
Hiroshiba marked this conversation as resolved.
Show resolved Hide resolved
"""

self.__attr_true_names: dict[str, str] = {}
for field in dataclasses.fields(self):
if (rename := _rename_audio_query_field(field.name)) != field.name:
self.__attr_true_names[rename] = field.name
field.name = rename

def __getattr__(self, name: str) -> object:
"""camelCaseの名前に対し、対応するsnake_caseの名前があるならそれについて返す。"""
Copy link
Member Author

@qryxip qryxip Jan 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

この二つをSphinxがそのまま出すため、docstringをちゃんと書いたという流れ。なのでこれらはユーザー向けの説明である。

image

Hiroshiba marked this conversation as resolved.
Show resolved Hide resolved

if true_name := self.__attr_true_names.get(name):
return getattr(self, true_name)
# 普通の`AttributeError`と同じ文面
raise AttributeError(f"{type(self).__name__!r} has no attribute {name!r}")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

多分こういう書き方もあるが、どっちが良いかどうか正直わからない。

Suggested change
if true_name := self.__attr_true_names.get(name):
return getattr(self, true_name)
# 普通の`AttributeError`と同じ文面
raise AttributeError(f"{type(self).__name__!r} has no attribute {name!r}")
try:
true_name = self.__attr_true_names[name]
except KeyError:
# 普通の`AttributeError`と同じ文面
raise AttributeError(f"{type(self).__name__!r} has no attribute {name!r}")
return getattr(self, true_name)



class UserDictWordType(str, Enum):
"""ユーザー辞書の単語の品詞。"""
Expand Down
Loading