Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mutabilityとasyncnessを仕上げる #553

Merged
merged 21 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5d17802
[wip] Add `gpu_num_sessions` options to `load_voice_model`
qryxip Jul 24, 2023
6eab358
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Jul 29, 2023
ce2f8c0
`decode-with-gpu`を剥がす
qryxip Jul 29, 2023
47a77ee
`gpu_num_sessions`オプションを剥がす
qryxip Jul 29, 2023
ee2dc76
`predict`も別々に`Mutex`に包む
qryxip Jul 29, 2023
cf92b69
`LoadedModels`のdocを書く
qryxip Jul 29, 2023
b893fed
`cargo xtask update-c-header`
qryxip Jul 29, 2023
780d880
`git restore -s main -- crates/voicevox_core_c_api/tests/`
qryxip Jul 29, 2023
1e8886d
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 6, 2023
73344b8
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 11, 2023
c460e48
`synthesizer_get_metas_json` → `synthesizer_create_metas_json`
qryxip Aug 11, 2023
f98ea9f
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 12, 2023
06fef88
#575 で追加されたテストを修正
qryxip Aug 12, 2023
4891a9c
warningを解消
qryxip Aug 12, 2023
e971c2b
`create_metas_json`の返り値を`*mut c_char`に
qryxip Aug 12, 2023
d4eaf55
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 16, 2023
24f5e0f
Rework `ensure_not_contains`
qryxip Aug 16, 2023
e66168e
`LoadedModels`からのsession取得を`get`という単一のメソッドに
qryxip Aug 16, 2023
1101624
`Error::LoadModel`に色々統合する
qryxip Aug 17, 2023
36700d8
欠番を使わない
qryxip Aug 17, 2023
60cad17
`cargo xtask update-c-header`
qryxip Aug 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 181 additions & 41 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ version.workspace = true
edition.workspace = true
publish.workspace = true

[[bench]]
name = "decode-with-gpu"
harness = false

[features]
default = []
directml = ["onnxruntime/directml"]
Expand Down Expand Up @@ -43,6 +47,7 @@ git = "https://github.com/VOICEVOX/open_jtalk-rs.git"
rev = "a16714ce16dec76fd0e3041a7acfa484921db3b5"

[dev-dependencies]
criterion = { version = "0.5.1", features = ["async_tokio"] }
flate2 = "1.0.24"
heck = "0.4.0"
pretty_assertions = "1.3.0"
Expand Down
78 changes: 78 additions & 0 deletions crates/voicevox_core/benches/decode-with-gpu.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
use std::{num::NonZeroU16, sync::Arc};

use criterion::{criterion_group, criterion_main, Criterion};
use test_util::OPEN_JTALK_DIC_DIR;
use tokio::{join, runtime::Runtime};
use voicevox_core::{
AccelerationMode, AudioQueryModel, InitializeOptions, LoadVoiceModelOptions, OpenJtalk,
StyleId, SynthesisOptions, Synthesizer, VoiceModel,
};

criterion_main!(benches);
criterion_group!(benches, benchmark);

fn benchmark(c: &mut Criterion) {
let (synthesizer, aq) = &Runtime::new().unwrap().block_on(setup()).unwrap();

let decode = || async {
synthesizer
.synthesis(
aq,
StyleId::new(0),
&SynthesisOptions {
enable_interrogative_upspeak: true,
},
)
.await
.unwrap()
};

c.bench_function("decode_parallel", |b| {
b.to_async(Runtime::new().unwrap())
.iter(|| async { join!(decode(), decode(), decode(), decode()) })
});

c.bench_function("decode_sequential", |b| {
b.to_async(Runtime::new().unwrap()).iter(|| async {
for _ in 0..4 {
decode().await;
}
})
});
}

async fn setup() -> voicevox_core::Result<(Synthesizer, AudioQueryModel)> {
let syntesizer = Synthesizer::new_with_initialize(
Arc::new(OpenJtalk::new_with_initialize(OPEN_JTALK_DIC_DIR).unwrap()),
&InitializeOptions {
acceleration_mode: AccelerationMode::Gpu,
cpu_num_threads: 4,
..Default::default()
},
)
.await?;

let model = &VoiceModel::from_path(concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../model/sample.vvm",
))
.await?;
syntesizer
.load_voice_model(
model,
&LoadVoiceModelOptions {
gpu_num_sessions: NonZeroU16::new(4).unwrap(),
},
)
.await?;

let aq = syntesizer
.audio_query(
"寿限無寿限無五劫の擦り切れ海砂利水魚の水行末雲来末",
StyleId::new(0),
&Default::default(),
)
.await?;

Ok((syntesizer, aq))
}
4 changes: 0 additions & 4 deletions crates/voicevox_core/src/engine/synthesis_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ impl SynthesisEngine {
&self.inference_core
}

pub fn inference_core_mut(&mut self) -> &mut InferenceCore {
&mut self.inference_core
}

pub async fn create_accent_phrases(
&self,
text: &str,
Expand Down
85 changes: 40 additions & 45 deletions crates/voicevox_core/src/inference_core.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use std::num::NonZeroU16;

use self::status::*;
use super::*;
use onnxruntime::{
ndarray,
session::{AnyArray, NdArray},
};
use onnxruntime::{ndarray, session::NdArray};

const PHONEME_LENGTH_MINIMAL: f32 = 0.01;

Expand All @@ -18,11 +17,13 @@ impl InferenceCore {
load_all_models: bool,
) -> Result<Self> {
if !use_gpu || Self::can_support_gpu_feature()? {
let mut status = Status::new(use_gpu, cpu_num_threads);
let status = Status::new(use_gpu, cpu_num_threads);

if load_all_models {
for model in &VoiceModel::get_all_models().await? {
status.load_model(model).await?;
status
.load_model(model, NonZeroU16::new(1).unwrap())
.await?;
}
}
Ok(Self { status })
Expand All @@ -43,14 +44,14 @@ impl InferenceCore {
}
}

pub async fn load_model(&mut self, model: &VoiceModel) -> Result<()> {
self.status.load_model(model).await
pub async fn load_model(&self, model: &VoiceModel, gpu_num_sessions: NonZeroU16) -> Result<()> {
self.status.load_model(model, gpu_num_sessions).await
}

pub fn unload_model(&mut self, voice_model_id: &VoiceModelId) -> Result<()> {
pub fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
self.status.unload_model(voice_model_id)
}
pub fn metas(&self) -> &VoiceModelMeta {
pub fn metas(&self) -> VoiceModelMeta {
self.status.metas()
}

Expand All @@ -71,15 +72,13 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let mut phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64]));

let input_tensors: Vec<&mut dyn AnyArray> =
vec![&mut phoneme_vector_array, &mut speaker_id_array];
let phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
let speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64]));

let mut output = self
.status
.predict_duration_session_run(style_id, input_tensors)?;
.predict_duration_session_run(style_id, phoneme_vector_array, speaker_id_array)
.await?;

for output_item in output.iter_mut() {
if *output_item < PHONEME_LENGTH_MINIMAL {
Expand All @@ -106,31 +105,29 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let mut length_array = NdArray::new(ndarray::arr0(length as i64));
let mut vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
let mut consonant_phoneme_vector_array =
NdArray::new(ndarray::arr1(consonant_phoneme_vector));
let mut start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
let mut end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
let mut start_accent_phrase_vector_array =
let length_array = NdArray::new(ndarray::arr0(length as i64));
let vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
let consonant_phoneme_vector_array = NdArray::new(ndarray::arr1(consonant_phoneme_vector));
let start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
let end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
let start_accent_phrase_vector_array =
NdArray::new(ndarray::arr1(start_accent_phrase_vector));
let mut end_accent_phrase_vector_array =
NdArray::new(ndarray::arr1(end_accent_phrase_vector));
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64]));

let input_tensors: Vec<&mut dyn AnyArray> = vec![
&mut length_array,
&mut vowel_phoneme_vector_array,
&mut consonant_phoneme_vector_array,
&mut start_accent_vector_array,
&mut end_accent_vector_array,
&mut start_accent_phrase_vector_array,
&mut end_accent_phrase_vector_array,
&mut speaker_id_array,
];
let end_accent_phrase_vector_array = NdArray::new(ndarray::arr1(end_accent_phrase_vector));
let speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64]));

self.status
.predict_intonation_session_run(style_id, input_tensors)
.predict_intonation_session_run(
style_id,
length_array,
vowel_phoneme_vector_array,
consonant_phoneme_vector_array,
start_accent_vector_array,
end_accent_vector_array,
start_accent_phrase_vector_array,
end_accent_phrase_vector_array,
speaker_id_array,
)
.await
}

pub async fn decode(
Expand Down Expand Up @@ -161,23 +158,21 @@ impl InferenceCore {
padding_size,
);

let mut f0_array = NdArray::new(
let f0_array = NdArray::new(
ndarray::arr1(&f0_with_padding)
.into_shape([length_with_padding, 1])
.unwrap(),
);
let mut phoneme_array = NdArray::new(
let phoneme_array = NdArray::new(
ndarray::arr1(&phoneme_with_padding)
.into_shape([length_with_padding, phoneme_size])
.unwrap(),
);
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64]));

let input_tensors: Vec<&mut dyn AnyArray> =
vec![&mut f0_array, &mut phoneme_array, &mut speaker_id_array];
let speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64]));

self.status
.decode_session_run(style_id, input_tensors)
.decode_session_run(style_id, f0_array, phoneme_array, speaker_id_array)
.await
.map(|output| Self::trim_padding_from_output(output, padding_size))
}

Expand Down
Loading