Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: AudioQueryのJSON表現をENGINEと同じにする #946

Merged
merged 11 commits into from
Feb 3, 2025
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ futures-core = "0.3.31"
futures-util = "0.3.31"
futures-lite = "2.3.0"
futures-io = "0.3.31"
heck = "0.4.1"
humansize = "2.1.3"
indexmap = "2.6.0"
indicatif = "0.17.8"
Expand Down
1 change: 0 additions & 1 deletion crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ voicevox-ort = { workspace = true, features = ["download-binaries", "__init-for-
voicevox_core_macros.workspace = true

[dev-dependencies]
heck.workspace = true
pollster = { workspace = true, features = ["macro"] }
pretty_assertions.workspace = true
rstest.workspace = true
Expand Down
103 changes: 36 additions & 67 deletions crates/voicevox_core/src/engine/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,42 @@ pub struct AudioQuery {
/// アクセント句の配列。
pub accent_phrases: Vec<AccentPhrase>,
/// 全体の話速。
#[serde(rename = "speedScale")]
pub speed_scale: f32,
/// 全体の音高。
#[serde(rename = "pitchScale")]
pub pitch_scale: f32,
/// 全体の抑揚。
#[serde(rename = "intonationScale")]
pub intonation_scale: f32,
/// 全体の音量。
#[serde(rename = "volumeScale")]
pub volume_scale: f32,
/// 音声の前の無音時間。
#[serde(rename = "prePhonemeLength")]
pub pre_phoneme_length: f32,
/// 音声の後の無音時間。
#[serde(rename = "postPhonemeLength")]
pub post_phoneme_length: f32,
/// 音声データの出力サンプリングレート。
#[serde(rename = "outputSamplingRate")]
pub output_sampling_rate: u32,
/// 音声データをステレオ出力するか否か。
#[serde(rename = "outputStereo")]
pub output_stereo: bool,
// TODO: VOICEVOX/voicevox_engine#1308 を実装する
/// 句読点などの無音時間。`null`のときは無視される。デフォルト値は`null`。
#[serde(
default,
rename = "pauseLength",
deserialize_with = "deserialize_pause_length",
serialize_with = "serialize_pause_length"
)]
pub pause_length: (),
/// 読点などの無音時間(倍率)。デフォルト値は`1`。
#[serde(
default,
rename = "pauseLengthScale",
deserialize_with = "deserialize_pause_length_scale",
serialize_with = "serialize_pause_length_scale"
)]
Expand Down Expand Up @@ -183,52 +193,11 @@ impl AudioQuery {

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use rstest::rstest;
use serde_json::json;

use super::AudioQuery;

#[rstest]
fn check_audio_query_model_json_field_snake_case() {
let audio_query_model = AudioQuery {
accent_phrases: vec![],
speed_scale: 0.0,
pitch_scale: 0.0,
intonation_scale: 0.0,
volume_scale: 0.0,
pre_phoneme_length: 0.0,
post_phoneme_length: 0.0,
output_sampling_rate: 0,
output_stereo: false,
pause_length: (),
pause_length_scale: (),
kana: None,
};
let val = serde_json::to_value(audio_query_model).unwrap();
check_json_field_snake_case(&val);
}

fn check_json_field_snake_case(val: &serde_json::Value) {
use serde_json::Value::*;

match val {
Object(obj) => {
for (k, v) in obj.iter() {
use heck::ToSnakeCase as _;
assert_eq!(k.to_snake_case(), *k, "should be snake case {k}");
check_json_field_snake_case(v);
}
}
Array(array) => {
for val in array.iter() {
check_json_field_snake_case(val);
}
}
_ => {}
}
}

#[rstest]
fn it_accepts_json_without_optional_fields() -> anyhow::Result<()> {
serde_json::from_value::<AudioQuery>(json!({
Expand All @@ -245,14 +214,14 @@ mod tests {
"accent": 1
}
],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false
}))?;
Ok(())
}
Expand All @@ -262,15 +231,15 @@ mod tests {
fn it_denies_non_null_for_pause_length() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length": "aaaaa"
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false,
"pauseLength": "aaaaa"
}))
.map(|_| ())
.unwrap_err();
Expand All @@ -281,15 +250,15 @@ mod tests {
fn it_denies_non_float_for_pause_length_scale() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length_scale": "aaaaa",
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false,
"pauseLengthScale": "aaaaa",
}))
.map(|_| ())
.unwrap_err();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,66 +16,42 @@ public class AudioQuery {
public List<AccentPhrase> accentPhrases;

/** 全体の話速。 */
@SerializedName("speed_scale")
@Expose
public double speedScale;
@Expose public double speedScale;

/** 全体の音高。 */
@SerializedName("pitch_scale")
@Expose
public double pitchScale;
@Expose public double pitchScale;

/** 全体の抑揚。 */
@SerializedName("intonation_scale")
@Expose
public double intonationScale;
@Expose public double intonationScale;

/** 全体の音量。 */
@SerializedName("volume_scale")
@Expose
public double volumeScale;
@Expose public double volumeScale;

/** 音声の前の無音時間。 */
@SerializedName("pre_phoneme_length")
@Expose
public double prePhonemeLength;
@Expose public double prePhonemeLength;

/** 音声の後の無音時間。 */
@SerializedName("post_phoneme_length")
@Expose
public double postPhonemeLength;
@Expose public double postPhonemeLength;

/** 音声データの出力サンプリングレート。 */
@SerializedName("output_sampling_rate")
@Expose
public int outputSamplingRate;
@Expose public int outputSamplingRate;

/** 音声データをステレオ出力するか否か。 */
@SerializedName("output_stereo")
@Expose
public boolean outputStereo;
@Expose public boolean outputStereo;

/** 句読点などの無音時間。{@code null}のときは無視される。デフォルト値は{@code null}。 */
@SerializedName("pause_length")
@Expose
@Nullable
public Double pauseLength;
@Expose @Nullable public Double pauseLength;

/** 読点などの無音時間(倍率)。デフォルト値は{@code 1.}。 */
@SerializedName("pause_length_scale")
@Expose
public double pauseLengthScale;
@Expose public double pauseLengthScale;

/**
* [読み取り専用] AquesTalk風記法。
*
* <p>{@link jp.hiroshiba.voicevoxcore.blocking.Synthesizer#createAudioQuery} が返すもののみ String
* となる。入力としてのAudioQueryでは無視される。
*/
@SerializedName("kana")
@Expose
@Nullable
public final String kana;
@Expose @Nullable public final String kana;

public AudioQuery() {
this.accentPhrases = new ArrayList<>();
Expand Down
80 changes: 72 additions & 8 deletions crates/voicevox_core_python_api/python/test/test_audio_query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import dataclasses
import json
import textwrap

import pytest
from voicevox_core import AudioQuery


Expand All @@ -23,16 +25,78 @@ def test_accept_json_without_optional_fields() -> None:
"accent": 1
}
],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false
}
""",
)
)
)


def test_dumps() -> None:
BEFORE = textwrap.dedent(
"""\
{
"accent_phrases": [],
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false,
"pauseLength": null,
"pauseLengthScale": 1.0,
"kana": ""
}""",
)

after = json.dumps(dataclasses.asdict(AudioQuery(**json.loads(BEFORE))), indent=2)
assert BEFORE == after


# あまり保証したくない性質ではあるが、`dataclasses.asdict`に必要
def test_getattr() -> None:
query = AudioQuery(
**json.loads(
textwrap.dedent(
"""\
{
"accent_phrases": [],
"speedScale": 1.0,
"pitchScale": 0.0,
"intonationScale": 1.0,
"volumeScale": 1.0,
"prePhonemeLength": 0.1,
"postPhonemeLength": 0.1,
"outputSamplingRate": 24000,
"outputStereo": false
}
""",
)
)
)

assert getattr(query, "speedScale") is query.speed_scale
Hiroshiba marked this conversation as resolved.
Show resolved Hide resolved
assert getattr(query, "pitchScale") is query.pitch_scale
assert getattr(query, "intonationScale") is query.intonation_scale
assert getattr(query, "volumeScale") is query.volume_scale
assert getattr(query, "prePhonemeLength") is query.pre_phoneme_length
assert getattr(query, "postPhonemeLength") is query.post_phoneme_length
assert getattr(query, "outputSamplingRate") is query.output_sampling_rate
assert getattr(query, "outputStereo") is query.output_stereo
assert getattr(query, "pauseLength") is query.pause_length
assert getattr(query, "pauseLengthScale") is query.pause_length_scale

with pytest.raises(
AttributeError, match="^'AudioQuery' has no attribute 'nonexisting_name'$"
):
getattr(query, "nonexisting_name")
Loading
Loading