From 043220688c908d0d1cf3718a78043e67a985b9f5 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Sat, 14 Sep 2024 19:09:24 +0900 Subject: [PATCH] =?UTF-8?q?change:=20`VoiceModel`=20=E2=86=92=20`VoiceMode?= =?UTF-8?q?lFile`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 2 + crates/voicevox_core/Cargo.toml | 2 +- .../src/__internal/doctest_fixtures.rs | 2 +- crates/voicevox_core/src/asyncs.rs | 147 ++++++++++++--- crates/voicevox_core/src/blocking.rs | 2 +- crates/voicevox_core/src/engine/open_jtalk.rs | 2 +- .../src/infer/runtimes/onnxruntime.rs | 2 +- crates/voicevox_core/src/nonblocking.rs | 2 +- crates/voicevox_core/src/status.rs | 4 +- crates/voicevox_core/src/synthesizer.rs | 31 ++-- crates/voicevox_core/src/test_util.rs | 4 +- crates/voicevox_core/src/user_dict/dict.rs | 2 +- crates/voicevox_core/src/voice_model.rs | 91 ++++++---- .../include/voicevox_core.h | 36 ++-- crates/voicevox_core_c_api/src/c_impls.rs | 11 +- .../src/compatible_engine.rs | 8 +- crates/voicevox_core_c_api/src/lib.rs | 44 ++--- .../tests/e2e/testcases/simple_tts.rs | 4 +- .../testcases/synthesizer_new_output_json.rs | 2 +- .../e2e/testcases/tts_via_audio_query.rs | 4 +- .../tests/e2e/testcases/user_dict_load.rs | 4 +- .../hiroshiba/voicevoxcore/Synthesizer.java | 14 +- .../{VoiceModel.java => VoiceModelFile.java} | 63 +++++-- .../jp/hiroshiba/voicevoxcore/MetaTest.java | 5 +- .../voicevoxcore/SynthesizerTest.java | 45 ++--- .../jp/hiroshiba/voicevoxcore/TestUtils.java | 4 +- .../hiroshiba/voicevoxcore/UserDictTest.java | 11 +- .../voicevoxcore/VoiceModelTest.java | 5 +- .../voicevox_core_java_api/src/synthesizer.rs | 2 +- .../voicevox_core_java_api/src/voice_model.rs | 22 ++- crates/voicevox_core_python_api/Cargo.toml | 2 + .../python/test/test_asyncio_metas.py | 10 +- .../test/test_asyncio_user_dict_load.py | 2 +- .../python/test/test_blocking_metas.py | 10 +- .../test/test_blocking_user_dict_load.py | 2 +- .../python/voicevox_core/_rust/asyncio.pyi | 26 ++- .../python/voicevox_core/_rust/blocking.pyi | 26 ++- .../python/voicevox_core/asyncio.py | 4 +- .../python/voicevox_core/blocking.py | 10 +- crates/voicevox_core_python_api/src/lib.rs | 169 ++++++++++++++---- docs/usage.md | 12 +- example/cpp/unix/simple_tts.cpp | 6 +- example/cpp/windows/simple_tts/simple_tts.cpp | 7 +- example/kotlin/app/src/main/kotlin/app/App.kt | 2 +- example/python/run-asyncio.py | 6 +- example/python/run.py | 6 +- 46 files changed, 596 insertions(+), 281 deletions(-) rename crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/{VoiceModel.java => VoiceModelFile.java} (71%) diff --git a/Cargo.lock b/Cargo.lock index defbbe093..62dc07ffe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4484,8 +4484,10 @@ dependencies = [ name = "voicevox_core_python_api" version = "0.0.0" dependencies = [ + "blocking", "camino", "easy-ext", + "futures-util", "log", "once_cell", "pyo3", diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml index e05b04c79..2af88318b 100644 --- a/crates/voicevox_core/Cargo.toml +++ b/crates/voicevox_core/Cargo.toml @@ -16,7 +16,7 @@ link-onnxruntime = [] [dependencies] anyhow.workspace = true -async-fs.workspace = true +async-fs.workspace = true # 今これを使っている箇所はどこにも無いが、`UserDict`にはこれを使った方がよいはず async_zip = { workspace = true, features = ["deflate"] } blocking.workspace = true camino.workspace = true diff --git a/crates/voicevox_core/src/__internal/doctest_fixtures.rs b/crates/voicevox_core/src/__internal/doctest_fixtures.rs index 253bb8d6f..57c28c7a9 100644 --- a/crates/voicevox_core/src/__internal/doctest_fixtures.rs +++ b/crates/voicevox_core/src/__internal/doctest_fixtures.rs @@ -26,7 +26,7 @@ pub async fn synthesizer_with_sample_voice_model( }, )?; - let model = &crate::nonblocking::VoiceModel::from_path(voice_model_path).await?; + let model = &crate::nonblocking::VoiceModelFile::open(voice_model_path).await?; syntesizer.load_voice_model(model).await?; Ok(syntesizer) diff --git a/crates/voicevox_core/src/asyncs.rs b/crates/voicevox_core/src/asyncs.rs index 5f4d7fd21..aca3f2f5c 100644 --- a/crates/voicevox_core/src/asyncs.rs +++ b/crates/voicevox_core/src/asyncs.rs @@ -15,15 +15,24 @@ use std::{ io::{self, Read as _, Seek as _, SeekFrom}, + ops::DerefMut, path::Path, pin::Pin, task::{self, Poll}, }; +use blocking::Unblock; use futures_io::{AsyncRead, AsyncSeek}; +use futures_util::ready; pub(crate) trait Async: 'static { - async fn open_file(path: impl AsRef) -> io::Result; + type Mutex: Mutex; + type RoFile: AsyncRead + AsyncSeek + Send + Sync + Unpin; + async fn open_file_ro(path: impl AsRef) -> io::Result; +} + +pub(crate) trait Mutex: From + Send + Sync + Unpin { + async fn lock(&self) -> impl DerefMut; } /// エグゼキュータが非同期タスクの並行実行をしないことを仮定する、[`Async`]の実装。 @@ -39,30 +48,47 @@ pub(crate) trait Async: 'static { pub(crate) enum SingleTasked {} impl Async for SingleTasked { - async fn open_file(path: impl AsRef) -> io::Result { - return std::fs::File::open(path).map(BlockingFile); - - struct BlockingFile(std::fs::File); - - impl AsyncRead for BlockingFile { - fn poll_read( - mut self: Pin<&mut Self>, - _: &mut task::Context<'_>, - buf: &mut [u8], - ) -> Poll> { - Poll::Ready(self.0.read(buf)) - } - } + type Mutex = futures_util::lock::Mutex; // TODO + type RoFile = StdFile; - impl AsyncSeek for BlockingFile { - fn poll_seek( - mut self: Pin<&mut Self>, - _: &mut task::Context<'_>, - pos: SeekFrom, - ) -> Poll> { - Poll::Ready(self.0.seek(pos)) - } - } + async fn open_file_ro(path: impl AsRef) -> io::Result { + std::fs::File::open(path).map(StdFile) + } +} + +pub(crate) struct StdMutex(std::sync::Mutex); + +impl From for StdMutex { + fn from(inner: T) -> Self { + Self(inner.into()) + } +} + +impl Mutex for StdMutex { + async fn lock(&self) -> impl DerefMut { + self.0.lock().unwrap_or_else(|e| panic!("{e}")) + } +} + +pub(crate) struct StdFile(std::fs::File); + +impl AsyncRead for StdFile { + fn poll_read( + mut self: Pin<&mut Self>, + _: &mut task::Context<'_>, + buf: &mut [u8], + ) -> Poll> { + Poll::Ready(self.0.read(buf)) + } +} + +impl AsyncSeek for StdFile { + fn poll_seek( + mut self: Pin<&mut Self>, + _: &mut task::Context<'_>, + pos: SeekFrom, + ) -> Poll> { + Poll::Ready(self.0.seek(pos)) } } @@ -74,7 +100,76 @@ impl Async for SingleTasked { pub(crate) enum BlockingThreadPool {} impl Async for BlockingThreadPool { - async fn open_file(path: impl AsRef) -> io::Result { - async_fs::File::open(path).await + type Mutex = futures_util::lock::Mutex; + type RoFile = AsyncRoFile; + + async fn open_file_ro(path: impl AsRef) -> io::Result { + AsyncRoFile::open(path).await + } +} + +impl Mutex for futures_util::lock::Mutex { + async fn lock(&self) -> impl DerefMut { + self.lock().await + } +} + +// TODO: `async_fs::File::into_std_file`みたいなのがあればこんなの↓は作らなくていいはず。PR出す? +pub(crate) struct AsyncRoFile { + // `poll_read`と`poll_seek`しかしない + unblock: Unblock, + + // async-fsの実装がやっているように「正しい」シーク位置を保持する。ただしファイルはパイプではな + // いことがわかっているため smol-rs/async-fs#4 は考えない + real_seek_pos: Option, +} + +impl AsyncRoFile { + async fn open(path: impl AsRef) -> io::Result { + let path = path.as_ref().to_owned(); + let unblock = Unblock::new(blocking::unblock(|| std::fs::File::open(path)).await?); + Ok(Self { + unblock, + real_seek_pos: None, + }) + } + + pub(crate) async fn close(self) { + let file = self.unblock.into_inner().await; + blocking::unblock(|| drop(file)).await; + } +} + +impl AsyncRead for AsyncRoFile { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut task::Context<'_>, + buf: &mut [u8], + ) -> Poll> { + if self.real_seek_pos.is_none() { + self.real_seek_pos = Some(ready!( + Pin::new(&mut self.unblock).poll_seek(cx, SeekFrom::Current(0)) + )?); + } + let n = ready!(Pin::new(&mut self.unblock).poll_read(cx, buf))?; + *self.real_seek_pos.as_mut().expect("should be present") += n as u64; + Poll::Ready(Ok(n)) + } +} + +impl AsyncSeek for AsyncRoFile { + fn poll_seek( + mut self: Pin<&mut Self>, + cx: &mut task::Context<'_>, + pos: SeekFrom, + ) -> Poll> { + // async-fsの実装がやっているような"reposition"を行う。 + // https://github.com/smol-rs/async-fs/issues/2#issuecomment-675595170 + if let Some(real_seek_pos) = self.real_seek_pos { + ready!(Pin::new(&mut self.unblock).poll_seek(cx, SeekFrom::Start(real_seek_pos)))?; + } + self.real_seek_pos = None; + + Pin::new(&mut self.unblock).poll_seek(cx, pos) } } diff --git a/crates/voicevox_core/src/blocking.rs b/crates/voicevox_core/src/blocking.rs index 8d0bc2129..3443e3085 100644 --- a/crates/voicevox_core/src/blocking.rs +++ b/crates/voicevox_core/src/blocking.rs @@ -3,7 +3,7 @@ pub use crate::{ engine::open_jtalk::blocking::OpenJtalk, infer::runtimes::onnxruntime::blocking::Onnxruntime, synthesizer::blocking::Synthesizer, user_dict::dict::blocking::UserDict, - voice_model::blocking::VoiceModel, + voice_model::blocking::VoiceModelFile, }; pub mod onnxruntime { diff --git a/crates/voicevox_core/src/engine/open_jtalk.rs b/crates/voicevox_core/src/engine/open_jtalk.rs index fb7f3ea59..f27e9b0a6 100644 --- a/crates/voicevox_core/src/engine/open_jtalk.rs +++ b/crates/voicevox_core/src/engine/open_jtalk.rs @@ -1,4 +1,4 @@ -// TODO: `VoiceModel`のように、次のような設計にする。 +// TODO: `VoiceModelFile`のように、次のような設計にする。 // // ``` // pub(crate) mod blocking { diff --git a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs index 91e435701..f7f92355e 100644 --- a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs +++ b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs @@ -1,4 +1,4 @@ -// TODO: `VoiceModel`のように、次のような設計にする。 +// TODO: `VoiceModelFile`のように、次のような設計にする。 // // ``` // pub(crate) mod blocking { diff --git a/crates/voicevox_core/src/nonblocking.rs b/crates/voicevox_core/src/nonblocking.rs index 501a44d04..7187c57fa 100644 --- a/crates/voicevox_core/src/nonblocking.rs +++ b/crates/voicevox_core/src/nonblocking.rs @@ -15,7 +15,7 @@ pub use crate::{ engine::open_jtalk::nonblocking::OpenJtalk, infer::runtimes::onnxruntime::nonblocking::Onnxruntime, synthesizer::nonblocking::Synthesizer, - user_dict::dict::nonblocking::UserDict, voice_model::nonblocking::VoiceModel, + user_dict::dict::nonblocking::UserDict, voice_model::nonblocking::VoiceModelFile, }; pub mod onnxruntime { diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index 5103e060e..40e1ae6d2 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -408,7 +408,7 @@ mod tests { talk: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)), }, ); - let model = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); let model_contents = &model.read_inference_models().await.unwrap(); let result = status.insert_model(model.header(), model_contents); assert_debug_fmt_eq!(Ok(()), result); @@ -424,7 +424,7 @@ mod tests { talk: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)), }, ); - let vvm = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let vvm = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); let model_header = vvm.header(); let model_contents = &vvm.read_inference_models().await.unwrap(); assert!( diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index 7a1bb2ab8..045a2d9ea 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -1,4 +1,4 @@ -// TODO: `VoiceModel`のように、次のような設計にする。 +// TODO: `VoiceModelFile`のように、次のような設計にする。 // // ``` // pub(crate) mod blocking { @@ -235,7 +235,7 @@ pub(crate) mod blocking { } /// 音声モデルを読み込む。 - pub fn load_voice_model(&self, model: &crate::blocking::VoiceModel) -> Result<()> { + pub fn load_voice_model(&self, model: &crate::blocking::VoiceModelFile) -> Result<()> { let model_bytes = &model.read_inference_models()?; self.status.insert_model(model.header(), model_bytes) } @@ -1181,7 +1181,10 @@ pub(crate) mod nonblocking { self.0.is_gpu_mode() } - pub async fn load_voice_model(&self, model: &crate::nonblocking::VoiceModel) -> Result<()> { + pub async fn load_voice_model( + &self, + model: &crate::nonblocking::VoiceModelFile, + ) -> Result<()> { let model_bytes = &model.read_inference_models().await?; self.0.status.insert_model(model.header(), model_bytes) } @@ -1351,7 +1354,7 @@ mod tests { .unwrap(); let result = syntesizer - .load_voice_model(&crate::nonblocking::VoiceModel::sample().await.unwrap()) + .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap()) .await; assert_debug_fmt_eq!( @@ -1399,7 +1402,7 @@ mod tests { "expected is_model_loaded to return false, but got true", ); syntesizer - .load_voice_model(&crate::nonblocking::VoiceModel::sample().await.unwrap()) + .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap()) .await .unwrap(); @@ -1427,7 +1430,7 @@ mod tests { .unwrap(); syntesizer - .load_voice_model(&crate::nonblocking::VoiceModel::sample().await.unwrap()) + .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap()) .await .unwrap(); @@ -1460,7 +1463,7 @@ mod tests { ) .unwrap(); syntesizer - .load_voice_model(&crate::nonblocking::VoiceModel::sample().await.unwrap()) + .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap()) .await .unwrap(); @@ -1502,7 +1505,7 @@ mod tests { ) .unwrap(); syntesizer - .load_voice_model(&crate::nonblocking::VoiceModel::sample().await.unwrap()) + .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap()) .await .unwrap(); @@ -1599,7 +1602,7 @@ mod tests { ) .unwrap(); - let model = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let query = match input { @@ -1670,7 +1673,7 @@ mod tests { ) .unwrap(); - let model = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = match input { @@ -1738,7 +1741,7 @@ mod tests { ) .unwrap(); - let model = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer @@ -1801,7 +1804,7 @@ mod tests { ) .unwrap(); - let model = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer @@ -1842,7 +1845,7 @@ mod tests { ) .unwrap(); - let model = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer @@ -1883,7 +1886,7 @@ mod tests { ) .unwrap(); - let model = &crate::nonblocking::VoiceModel::sample().await.unwrap(); + let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer diff --git a/crates/voicevox_core/src/test_util.rs b/crates/voicevox_core/src/test_util.rs index f92c4ee0c..e38f14c5c 100644 --- a/crates/voicevox_core/src/test_util.rs +++ b/crates/voicevox_core/src/test_util.rs @@ -2,8 +2,8 @@ use ::test_util::SAMPLE_VOICE_MODEL_FILE_PATH; use crate::Result; -impl crate::nonblocking::VoiceModel { +impl crate::nonblocking::VoiceModelFile { pub(crate) async fn sample() -> Result { - Self::from_path(SAMPLE_VOICE_MODEL_FILE_PATH).await + Self::open(SAMPLE_VOICE_MODEL_FILE_PATH).await } } diff --git a/crates/voicevox_core/src/user_dict/dict.rs b/crates/voicevox_core/src/user_dict/dict.rs index 13c30540d..0e1c89ca2 100644 --- a/crates/voicevox_core/src/user_dict/dict.rs +++ b/crates/voicevox_core/src/user_dict/dict.rs @@ -1,4 +1,4 @@ -// TODO: `VoiceModel`のように、次のような設計にする。 +// TODO: `VoiceModelFile`のように、次のような設計にする。 // // ``` // pub(crate) mod blocking { diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 48c541439..28d8140ad 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -3,7 +3,7 @@ //! VVM ファイルの定義と形式は[ドキュメント](../../../docs/vvm.md)を参照。 use std::{ - marker::PhantomData, + collections::HashMap, path::{Path, PathBuf}, sync::Arc, }; @@ -20,7 +20,7 @@ use serde::Deserialize; use uuid::Uuid; use crate::{ - asyncs::Async, + asyncs::{Async, Mutex as _}, error::{LoadModelError, LoadModelErrorKind, LoadModelResult}, infer::{ domains::{InferenceDomainMap, TalkDomain, TalkOperation}, @@ -61,20 +61,18 @@ impl VoiceModelId { } #[self_referencing] -struct Inner { +struct Inner { header: VoiceModelHeader, #[borrows(header)] #[not_covariant] inference_model_entries: InferenceDomainMap>, - // `_marker`とすると、`borrow__marker`のような名前のメソッドが生成されて`non_snake_case`が - // 起動してしまう - marker: PhantomData A>, + zip: A::Mutex, } impl Inner { - async fn from_path(path: impl AsRef) -> crate::Result { + async fn open(path: impl AsRef) -> crate::Result { const MANIFEST_FILENAME: &str = "manifest.json"; let path = path.as_ref(); @@ -89,8 +87,16 @@ impl Inner { .await .map_err(|source| error(LoadModelErrorKind::OpenZipFile, source))?; + let indices = zip.entry_indices_by_utf8_filenames(); + let find_entry_index = |filename: &str| { + indices + .get(filename) + .with_context(|| "could not find `{filename}`") + .copied() + }; + let manifest = &async { - let idx = zip.find_entry_index(MANIFEST_FILENAME)?; + let idx = find_entry_index(MANIFEST_FILENAME)?; zip.read_file(idx).await } .await @@ -106,7 +112,7 @@ impl Inner { .map_err(|source| error(LoadModelErrorKind::InvalidModelFormat, source.into()))?; let metas = &async { - let idx = zip.find_entry_index(manifest.metas_filename())?; + let idx = find_entry_index(manifest.metas_filename())?; zip.read_file(idx).await } .await @@ -133,13 +139,13 @@ impl Inner { .map(|manifest| { let indices = enum_map! { TalkOperation::PredictDuration => { - zip.find_entry_index(&manifest.predict_duration_filename)? + find_entry_index(&manifest.predict_duration_filename)? + } + TalkOperation::PredictIntonation => { + find_entry_index(&manifest.predict_intonation_filename)? } - TalkOperation::PredictIntonation => zip.find_entry_index( - &manifest.predict_intonation_filename, - )?, TalkOperation::Decode => { - zip.find_entry_index(&manifest.decode_filename)? + find_entry_index(&manifest.decode_filename)? } }; @@ -159,7 +165,7 @@ impl Inner { .collect() .map_err(crate::Error::from) }, - marker: PhantomData, + zip: zip.into_inner().into_inner().into(), } .try_build() } @@ -187,9 +193,11 @@ impl Inner { source: Some(source), }; - let mut zip = A::open_zip(path) + let zip = &mut *self.borrow_zip().lock().await; + let zip = futures_util::io::BufReader::new(zip); + let mut zip = async_zip::base::read::seek::ZipFileReader::new(zip) .await - .map_err(|source| error(LoadModelErrorKind::OpenZipFile, source))?; + .map_err(|source| error(LoadModelErrorKind::OpenZipFile, source.into()))?; macro_rules! read_file { ($entry:expr $(,)?) => {{ @@ -255,9 +263,10 @@ struct InferenceModelEntry { impl A { async fn open_zip( path: &Path, - ) -> anyhow::Result> - { - let zip = Self::open_file(path).await.with_context(|| { + ) -> anyhow::Result< + async_zip::base::read::seek::ZipFileReader>, + > { + let zip = Self::open_file_ro(path).await.with_context(|| { // fs-errのと同じにする format!("failed to open file `{}`", path.display()) })?; @@ -269,15 +278,13 @@ impl A { #[ext] impl async_zip::base::read::seek::ZipFileReader { - fn find_entry_index(&self, filename: &str) -> anyhow::Result { - let (idx, _) = self - .file() + fn entry_indices_by_utf8_filenames(&self) -> HashMap { + self.file() .entries() .iter() .enumerate() - .find(|(_, e)| e.filename().as_str().ok() == Some(filename)) - .with_context(|| "could not find `{filename}`")?; - Ok(idx) + .flat_map(|(i, e)| e.filename().as_str().map(|s| (s.to_owned(), i))) + .collect() } async fn read_file(&mut self, index: usize) -> anyhow::Result> { @@ -394,21 +401,21 @@ pub(crate) mod blocking { use super::{Inner, ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; - /// 音声モデル。 + /// 音声モデルファイル。 /// /// VVMファイルと対応する。 - pub struct VoiceModel(Inner); + pub struct VoiceModelFile(Inner); - impl self::VoiceModel { + impl self::VoiceModelFile { pub(crate) fn read_inference_models( &self, ) -> LoadModelResult> { self.0.read_inference_models().block_on() } - /// VVMファイルから`VoiceModel`をコンストラクトする。 - pub fn from_path(path: impl AsRef) -> crate::Result { - Inner::from_path(path).block_on().map(Self) + /// VVMファイルを開く。 + pub fn open(path: impl AsRef) -> crate::Result { + Inner::open(path).block_on().map(Self) } /// ID。 @@ -427,7 +434,7 @@ pub(crate) mod blocking { } #[ext(IdRef)] - pub impl VoiceModel { + pub impl VoiceModelFile { fn id_ref(&self) -> &Uuid { &self.header().manifest.id.0 } @@ -444,7 +451,7 @@ pub(crate) mod nonblocking { use super::{Inner, ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; - /// 音声モデル。 + /// 音声モデルファイル。 /// /// VVMファイルと対応する。 /// @@ -454,17 +461,23 @@ pub(crate) mod nonblocking { /// /// [blocking]: https://docs.rs/crate/blocking /// [`nonblocking`モジュールのドキュメント]: crate::nonblocking - pub struct VoiceModel(Inner); + pub struct VoiceModelFile(Inner); - impl self::VoiceModel { + impl self::VoiceModelFile { pub(crate) async fn read_inference_models( &self, ) -> LoadModelResult> { self.0.read_inference_models().await } - /// VVMファイルから`VoiceModel`をコンストラクトする。 - pub async fn from_path(path: impl AsRef) -> Result { - Inner::from_path(path).await.map(Self) + + /// VVMファイルを開く。 + pub async fn open(path: impl AsRef) -> Result { + Inner::open(path).await.map(Self) + } + + /// VVMファイルを閉じる。 + pub async fn close(self) { + self.0.into_heads().zip.into_inner().close().await; } /// ID。 diff --git a/crates/voicevox_core_c_api/include/voicevox_core.h b/crates/voicevox_core_c_api/include/voicevox_core.h index 422f32978..e158ebb6a 100644 --- a/crates/voicevox_core_c_api/include/voicevox_core.h +++ b/crates/voicevox_core_c_api/include/voicevox_core.h @@ -295,12 +295,12 @@ typedef struct VoicevoxSynthesizer VoicevoxSynthesizer; typedef struct VoicevoxUserDict VoicevoxUserDict; /** - * 音声モデル。 + * 音声モデルファイル。 * * VVMファイルと対応する。 - * 構築(_construction_)は ::voicevox_voice_model_new_from_path で行い、破棄(_destruction_)は ::voicevox_voice_model_delete で行う。 + * 構築(_construction_)は ::voicevox_voice_model_file_open で行い、破棄(_destruction_)は ::voicevox_voice_model_file_close で行う。 */ -typedef struct VoicevoxVoiceModel VoicevoxVoiceModel; +typedef struct VoicevoxVoiceModelFile VoicevoxVoiceModelFile; #if defined(VOICEVOX_LOAD_ONNXRUNTIME) /** @@ -593,7 +593,7 @@ __declspec(dllimport) const char *voicevox_get_version(void); /** - * VVMファイルから ::VoicevoxVoiceModel を構築(_construct_)する。 + * VVMファイルを開く。 * * @param [in] path vvmファイルへのUTF-8のファイルパス * @param [out] out_model 構築先 @@ -608,56 +608,56 @@ const char *voicevox_get_version(void); #ifdef _WIN32 __declspec(dllimport) #endif -VoicevoxResultCode voicevox_voice_model_new_from_path(const char *path, - struct VoicevoxVoiceModel **out_model); +VoicevoxResultCode voicevox_voice_model_file_open(const char *path, + struct VoicevoxVoiceModelFile **out_model); /** - * ::VoicevoxVoiceModel からIDを取得する。 + * ::VoicevoxVoiceModelFile からIDを取得する。 * * @param [in] model 音声モデル * * @returns 音声モデルID * * \safety{ - * - `model`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_voice_model_delete で解放されていてはいけない。 + * - `model`は ::voicevox_voice_model_file_open で得たものでなければならず、また ::voicevox_voice_model_file_close で解放されていてはいけない。 * } */ #ifdef _WIN32 __declspec(dllimport) #endif -VoicevoxVoiceModelId voicevox_voice_model_id(const struct VoicevoxVoiceModel *model); +VoicevoxVoiceModelId voicevox_voice_model_file_id(const struct VoicevoxVoiceModelFile *model); /** - * ::VoicevoxVoiceModel からメタ情報を取得する。 + * ::VoicevoxVoiceModelFile からメタ情報を取得する。 * * @param [in] model 音声モデル * * @returns メタ情報のJSON文字列 * * \safety{ - * - `model`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_voice_model_delete で解放されていてはいけない。 + * - `model`は ::voicevox_voice_model_file_open で得たものでなければならず、また ::voicevox_voice_model_file_close で解放されていてはいけない。 * - 戻り値の文字列の生存期間(_lifetime_)は次にこの関数が呼ばれるか、`model`が破棄されるまでである。この生存期間を越えて文字列にアクセスしてはならない。 * } */ #ifdef _WIN32 __declspec(dllimport) #endif -const char *voicevox_voice_model_get_metas_json(const struct VoicevoxVoiceModel *model); +const char *voicevox_voice_model_file_get_metas_json(const struct VoicevoxVoiceModelFile *model); /** - * ::VoicevoxVoiceModel を破棄(_destruct_)する。 + * ::VoicevoxVoiceModelFile を、所有しているファイルディスクリプタを閉じた上で破棄(_destruct_)する。 * * @param [in] model 破棄対象 * * \safety{ - * - `model`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また既にこの関数で解放されていてはいけない。 + * - `model`は ::voicevox_voice_model_file_open で得たものでなければならず、また既にこの関数で解放されていてはいけない。 * - `model`は以後ダングリングポインタ(_dangling pointer_)として扱われなくてはならない。 * } */ #ifdef _WIN32 __declspec(dllimport) #endif -void voicevox_voice_model_delete(struct VoicevoxVoiceModel *model); +void voicevox_voice_model_file_close(struct VoicevoxVoiceModelFile *model); /** * ::VoicevoxSynthesizer を構築(_construct_)する。 @@ -671,7 +671,7 @@ void voicevox_voice_model_delete(struct VoicevoxVoiceModel *model); * * \safety{ * - `onnxruntime`は ::voicevox_onnxruntime_load_once または ::voicevox_onnxruntime_init_once で得たものでなければならない。 - * - `open_jtalk`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_open_jtalk_rc_new で解放されていてはいけない。 + * - `open_jtalk`は ::voicevox_voice_model_file_open で得たものでなければならず、また ::voicevox_open_jtalk_rc_new で解放されていてはいけない。 * - `out_synthesizer`は書き込みについて有効でなければならない。 * } */ @@ -708,14 +708,14 @@ void voicevox_synthesizer_delete(struct VoicevoxSynthesizer *synthesizer); * * \safety{ * - `synthesizer`は ::voicevox_synthesizer_new で得たものでなければならず、また ::voicevox_synthesizer_delete で解放されていてはいけない。 - * - `model`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_voice_model_delete で解放されていてはいけない。 + * - `model`は ::voicevox_voice_model_file_open で得たものでなければならず、また ::voicevox_voice_model_file_close で解放されていてはいけない。 * } */ #ifdef _WIN32 __declspec(dllimport) #endif VoicevoxResultCode voicevox_synthesizer_load_voice_model(const struct VoicevoxSynthesizer *synthesizer, - const struct VoicevoxVoiceModel *model); + const struct VoicevoxVoiceModelFile *model); /** * 音声モデルの読み込みを解除する。 diff --git a/crates/voicevox_core_c_api/src/c_impls.rs b/crates/voicevox_core_c_api/src/c_impls.rs index 1adc402cd..0e9ff9a78 100644 --- a/crates/voicevox_core_c_api/src/c_impls.rs +++ b/crates/voicevox_core_c_api/src/c_impls.rs @@ -5,7 +5,8 @@ use ref_cast::ref_cast_custom; use voicevox_core::{InitializeOptions, Result, VoiceModelId}; use crate::{ - helpers::CApiResult, OpenJtalkRc, VoicevoxOnnxruntime, VoicevoxSynthesizer, VoicevoxVoiceModel, + helpers::CApiResult, OpenJtalkRc, VoicevoxOnnxruntime, VoicevoxSynthesizer, + VoicevoxVoiceModelFile, }; // FIXME: 中身(Rust API)を直接操作するかラッパーメソッド越しにするのかが混在していて、一貫性を @@ -87,7 +88,7 @@ impl VoicevoxSynthesizer { pub(crate) fn load_voice_model( &self, - model: &voicevox_core::blocking::VoiceModel, + model: &voicevox_core::blocking::VoiceModelFile, ) -> CApiResult<()> { self.synthesizer.load_voice_model(model)?; Ok(()) @@ -104,9 +105,9 @@ impl VoicevoxSynthesizer { } } -impl VoicevoxVoiceModel { - pub(crate) fn from_path(path: impl AsRef) -> Result { - let model = voicevox_core::blocking::VoiceModel::from_path(path)?; +impl VoicevoxVoiceModelFile { + pub(crate) fn open(path: impl AsRef) -> Result { + let model = voicevox_core::blocking::VoiceModelFile::open(path)?; let metas = CString::new(serde_json::to_string(model.metas()).unwrap()).unwrap(); Ok(Self { model, metas }) } diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs index 9fdff0c92..7b1a03e7e 100644 --- a/crates/voicevox_core_c_api/src/compatible_engine.rs +++ b/crates/voicevox_core_c_api/src/compatible_engine.rs @@ -35,10 +35,10 @@ static ONNXRUNTIME: LazyLock<&'static voicevox_core::blocking::Onnxruntime> = La }); struct VoiceModelSet { - all_vvms: Vec>, + all_vvms: Vec>, all_metas_json: CString, style_model_map: BTreeMap, - model_map: BTreeMap>, + model_map: BTreeMap>, } static VOICE_MODEL_SET: LazyLock = LazyLock::new(|| { @@ -66,7 +66,7 @@ static VOICE_MODEL_SET: LazyLock = LazyLock::new(|| { /// # Panics /// /// 失敗したらパニックする - fn get_all_models() -> Vec> { + fn get_all_models() -> Vec> { let root_dir = if let Some(root_dir) = env::var_os(ROOT_DIR_ENV_NAME) { root_dir.into() } else { @@ -84,7 +84,7 @@ static VOICE_MODEL_SET: LazyLock = LazyLock::new(|| { .unwrap_or_else(|e| panic!("{}が読めませんでした: {e}", root_dir.display())) .into_iter() .filter(|entry| entry.path().extension().map_or(false, |ext| ext == "vvm")) - .map(|entry| voicevox_core::blocking::VoiceModel::from_path(entry.path()).map(Arc::new)) + .map(|entry| voicevox_core::blocking::VoiceModelFile::open(entry.path()).map(Arc::new)) .collect::>() .unwrap() } diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 161af38e9..63f344553 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -399,13 +399,13 @@ pub extern "C" fn voicevox_get_version() -> *const c_char { }; } -/// 音声モデル。 +/// 音声モデルファイル。 /// /// VVMファイルと対応する。 -/// 構築(_construction_)は ::voicevox_voice_model_new_from_path で行い、破棄(_destruction_)は ::voicevox_voice_model_delete で行う。 +/// 構築(_construction_)は ::voicevox_voice_model_file_open で行い、破棄(_destruction_)は ::voicevox_voice_model_file_close で行う。 #[derive(Getters)] -pub struct VoicevoxVoiceModel { - model: voicevox_core::blocking::VoiceModel, +pub struct VoicevoxVoiceModelFile { + model: voicevox_core::blocking::VoiceModelFile, metas: CString, } @@ -417,7 +417,7 @@ pub type VoicevoxVoiceModelId<'a> = &'a [u8; 16]; /// VOICEVOXにおける、ある話者(_speaker_)のあるスタイル(_style_)を指す。 pub type VoicevoxStyleId = u32; -/// VVMファイルから ::VoicevoxVoiceModel を構築(_construct_)する。 +/// VVMファイルを開く。 /// /// @param [in] path vvmファイルへのUTF-8のファイルパス /// @param [out] out_model 構築先 @@ -429,60 +429,64 @@ pub type VoicevoxStyleId = u32; /// - `out_model`は書き込みについて有効でなければならない。 /// } #[no_mangle] -pub unsafe extern "C" fn voicevox_voice_model_new_from_path( +pub unsafe extern "C" fn voicevox_voice_model_file_open( path: *const c_char, - out_model: NonNull>, + out_model: NonNull>, ) -> VoicevoxResultCode { init_logger_once(); into_result_code_with_error((|| { let path = ensure_utf8(CStr::from_ptr(path))?; - let model = VoicevoxVoiceModel::from_path(path)?.into(); + let model = VoicevoxVoiceModelFile::open(path)?.into(); out_model.write_unaligned(model); Ok(()) })()) } -/// ::VoicevoxVoiceModel からIDを取得する。 +/// ::VoicevoxVoiceModelFile からIDを取得する。 /// /// @param [in] model 音声モデル /// /// @returns 音声モデルID /// /// \safety{ -/// - `model`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_voice_model_delete で解放されていてはいけない。 +/// - `model`は ::voicevox_voice_model_file_open で得たものでなければならず、また ::voicevox_voice_model_file_close で解放されていてはいけない。 /// } #[no_mangle] -pub extern "C" fn voicevox_voice_model_id(model: &VoicevoxVoiceModel) -> VoicevoxVoiceModelId<'_> { +pub extern "C" fn voicevox_voice_model_file_id( + model: &VoicevoxVoiceModelFile, +) -> VoicevoxVoiceModelId<'_> { init_logger_once(); model.model.id_ref().as_bytes() } -/// ::VoicevoxVoiceModel からメタ情報を取得する。 +/// ::VoicevoxVoiceModelFile からメタ情報を取得する。 /// /// @param [in] model 音声モデル /// /// @returns メタ情報のJSON文字列 /// /// \safety{ -/// - `model`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_voice_model_delete で解放されていてはいけない。 +/// - `model`は ::voicevox_voice_model_file_open で得たものでなければならず、また ::voicevox_voice_model_file_close で解放されていてはいけない。 /// - 戻り値の文字列の生存期間(_lifetime_)は次にこの関数が呼ばれるか、`model`が破棄されるまでである。この生存期間を越えて文字列にアクセスしてはならない。 /// } #[no_mangle] -pub extern "C" fn voicevox_voice_model_get_metas_json(model: &VoicevoxVoiceModel) -> *const c_char { +pub extern "C" fn voicevox_voice_model_file_get_metas_json( + model: &VoicevoxVoiceModelFile, +) -> *const c_char { init_logger_once(); model.metas().as_ptr() } -/// ::VoicevoxVoiceModel を破棄(_destruct_)する。 +/// ::VoicevoxVoiceModelFile を、所有しているファイルディスクリプタを閉じた上で破棄(_destruct_)する。 /// /// @param [in] model 破棄対象 /// /// \safety{ -/// - `model`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また既にこの関数で解放されていてはいけない。 +/// - `model`は ::voicevox_voice_model_file_open で得たものでなければならず、また既にこの関数で解放されていてはいけない。 /// - `model`は以後ダングリングポインタ(_dangling pointer_)として扱われなくてはならない。 /// } #[no_mangle] -pub extern "C" fn voicevox_voice_model_delete(model: Box) { +pub extern "C" fn voicevox_voice_model_file_close(model: Box) { init_logger_once(); drop(model); } @@ -506,7 +510,7 @@ pub struct VoicevoxSynthesizer { /// /// \safety{ /// - `onnxruntime`は ::voicevox_onnxruntime_load_once または ::voicevox_onnxruntime_init_once で得たものでなければならない。 -/// - `open_jtalk`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_open_jtalk_rc_new で解放されていてはいけない。 +/// - `open_jtalk`は ::voicevox_voice_model_file_open で得たものでなければならず、また ::voicevox_open_jtalk_rc_new で解放されていてはいけない。 /// - `out_synthesizer`は書き込みについて有効でなければならない。 /// } #[no_mangle] @@ -549,12 +553,12 @@ pub extern "C" fn voicevox_synthesizer_delete(synthesizer: Box VoicevoxResultCode { init_logger_once(); into_result_code_with_error(synthesizer.load_voice_model(model.model())) diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs index a4381f74d..1997d30e9 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs @@ -29,7 +29,7 @@ impl assert_cdylib::TestCase for TestCase { let model = { let mut model = MaybeUninit::uninit(); - assert_ok(lib.voicevox_voice_model_new_from_path( + assert_ok(lib.voicevox_voice_model_file_open( c_api::SAMPLE_VOICE_MODEL_FILE_PATH.as_ptr(), model.as_mut_ptr(), )); @@ -88,7 +88,7 @@ impl assert_cdylib::TestCase for TestCase { std::assert_eq!(SNAPSHOTS.output[&self.text].wav_length, wav_length); - lib.voicevox_voice_model_delete(model); + lib.voicevox_voice_model_file_close(model); lib.voicevox_open_jtalk_rc_delete(openjtalk); lib.voicevox_synthesizer_delete(synthesizer); lib.voicevox_wav_free(wav); diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs index 9ac8f4b35..ac662d06e 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs @@ -63,7 +63,7 @@ impl assert_cdylib::TestCase for TestCase { let model = { let mut model = MaybeUninit::uninit(); - assert_ok(lib.voicevox_voice_model_new_from_path( + assert_ok(lib.voicevox_voice_model_file_open( c_api::SAMPLE_VOICE_MODEL_FILE_PATH.as_ptr(), model.as_mut_ptr(), )); diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs index 0f2ff5fc8..2536a73d3 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs @@ -29,7 +29,7 @@ impl assert_cdylib::TestCase for TestCase { let model = { let mut model = MaybeUninit::uninit(); - assert_ok(lib.voicevox_voice_model_new_from_path( + assert_ok(lib.voicevox_voice_model_file_open( c_api::SAMPLE_VOICE_MODEL_FILE_PATH.as_ptr(), model.as_mut_ptr(), )); @@ -99,7 +99,7 @@ impl assert_cdylib::TestCase for TestCase { std::assert_eq!(SNAPSHOTS.output[&self.text].wav_length, wav_length); - lib.voicevox_voice_model_delete(model); + lib.voicevox_voice_model_file_close(model); lib.voicevox_open_jtalk_rc_delete(openjtalk); lib.voicevox_synthesizer_delete(synthesizer); lib.voicevox_json_free(audio_query); diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs index 64e062251..d044962ae 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs @@ -47,7 +47,7 @@ impl assert_cdylib::TestCase for TestCase { let model = { let mut model = MaybeUninit::uninit(); - assert_ok(lib.voicevox_voice_model_new_from_path( + assert_ok(lib.voicevox_voice_model_file_open( c_api::SAMPLE_VOICE_MODEL_FILE_PATH.as_ptr(), model.as_mut_ptr(), )); @@ -119,7 +119,7 @@ impl assert_cdylib::TestCase for TestCase { audio_query_with_dict.get("kana") ); - lib.voicevox_voice_model_delete(model); + lib.voicevox_voice_model_file_close(model); lib.voicevox_open_jtalk_rc_delete(openjtalk); lib.voicevox_synthesizer_delete(synthesizer); lib.voicevox_user_dict_delete(dict); diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java index 2fac70741..8ac62b9a5 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java @@ -54,10 +54,11 @@ public boolean isGpuMode() { * @return メタ情報。 */ @Nonnull - public VoiceModel.SpeakerMeta[] metas() { + public VoiceModelFile.SpeakerMeta[] metas() { Gson gson = new Gson(); String metasJson = rsGetMetasJson(); - VoiceModel.SpeakerMeta[] rawMetas = gson.fromJson(metasJson, VoiceModel.SpeakerMeta[].class); + VoiceModelFile.SpeakerMeta[] rawMetas = + gson.fromJson(metasJson, VoiceModelFile.SpeakerMeta[].class); if (rawMetas == null) { throw new NullPointerException("metas"); } @@ -70,8 +71,10 @@ public VoiceModel.SpeakerMeta[] metas() { * @param voiceModel 読み込むモデル。 * @throws InvalidModelDataException 無効なモデルデータの場合。 */ - public void loadVoiceModel(VoiceModel voiceModel) throws InvalidModelDataException { - rsLoadVoiceModel(voiceModel); + public void loadVoiceModel(VoiceModelFile voiceModel) throws InvalidModelDataException { + synchronized (voiceModel) { + rsLoadVoiceModel(voiceModel.opened()); + } } /** @@ -284,7 +287,8 @@ public TtsConfigurator tts(String text, int styleId) { @Nonnull private native String rsGetMetasJson(); - private native void rsLoadVoiceModel(VoiceModel voiceModel) throws InvalidModelDataException; + private native void rsLoadVoiceModel(VoiceModelFile.Opened voiceModel) + throws InvalidModelDataException; private native void rsUnloadVoiceModel(UUID voiceModelId); diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModelFile.java similarity index 71% rename from crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java rename to crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModelFile.java index d8c002f0f..2a761f8d9 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModelFile.java @@ -5,22 +5,24 @@ import com.google.gson.annotations.SerializedName; import jakarta.annotation.Nonnull; import jakarta.annotation.Nullable; +import java.io.Closeable; +import java.util.Optional; import java.util.UUID; -/** 音声モデル。 */ -public class VoiceModel extends Dll { - private long handle; - +/** 音声モデルファイル。 */ +public class VoiceModelFile extends Dll implements Closeable { /** ID。 */ @Nonnull public final UUID id; /** メタ情報。 */ @Nonnull public final SpeakerMeta[] metas; - public VoiceModel(String modelPath) { - rsFromPath(modelPath); - id = rsGetId(); - String metasJson = rsGetMetasJson(); + @Nullable private Opened inner; + + public VoiceModelFile(String modelPath) { + inner = new Opened(modelPath); + id = inner.rsGetId(); + String metasJson = inner.rsGetMetasJson(); Gson gson = new Gson(); SpeakerMeta[] rawMetas = gson.fromJson(metasJson, SpeakerMeta[].class); if (rawMetas == null) { @@ -29,20 +31,47 @@ public VoiceModel(String modelPath) { metas = rawMetas; } - protected void finalize() throws Throwable { - rsDrop(); - super.finalize(); + Opened opened() { + if (inner == null) { + throw new IllegalStateException("this `VoiceModelFile` is closed"); + } + return inner; } - private native void rsFromPath(String modelPath); + @Override + public synchronized void close() { + Optional inner = Optional.ofNullable(this.inner); + this.inner = null; + if (inner.isPresent()) { + inner.get().rsDrop(); + } + } + + static class Opened { + private long handle; + + private Opened(String modelPath) { + rsOpen(modelPath); + } + + @Override + protected void finalize() throws Throwable { + if (handle != 0) { + rsDrop(); + } + super.finalize(); + } - @Nonnull - private native UUID rsGetId(); + private native void rsOpen(String modelPath); - @Nonnull - private native String rsGetMetasJson(); + @Nonnull + private native UUID rsGetId(); - private native void rsDrop(); + @Nonnull + private native String rsGetMetasJson(); + + private native void rsDrop(); + } /** 話者(speaker)のメタ情報。 */ public static class SpeakerMeta { diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/MetaTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/MetaTest.java index 60df7359f..ece3a87ff 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/MetaTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/MetaTest.java @@ -14,7 +14,8 @@ void checkLoad() { // cwdはvoicevox_core/crates/voicevox_core_java_api/lib String cwd = System.getProperty("user.dir"); File path = new File(cwd + "/../../test_util/data/model/sample.vvm"); - VoiceModel model = new VoiceModel(path.getAbsolutePath()); - assertNotNull(model.metas); + try (VoiceModelFile model = new VoiceModelFile(path.getAbsolutePath())) { + assertNotNull(model.metas); + } } } diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java index 0dfa17ea3..4c7d16f56 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java @@ -49,58 +49,62 @@ boolean checkAllMoras( @Test void checkModel() throws InvalidModelDataException { Onnxruntime onnxruntime = loadOnnxruntime(); - VoiceModel model = loadModel(); OpenJtalk openJtalk = loadOpenJtalk(); Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); assertTrue(synthesizer.metas().length == 0); - synthesizer.loadVoiceModel(model); + try (VoiceModelFile model = openModel()) { + synthesizer.loadVoiceModel(model); - assertTrue(synthesizer.metas().length >= 1); - assertTrue(synthesizer.isLoadedVoiceModel(model.id)); + assertTrue(synthesizer.metas().length >= 1); + assertTrue(synthesizer.isLoadedVoiceModel(model.id)); - synthesizer.unloadVoiceModel(model.id); + synthesizer.unloadVoiceModel(model.id); - assertTrue(synthesizer.metas().length == 0); - assertFalse(synthesizer.isLoadedVoiceModel(model.id)); + assertTrue(synthesizer.metas().length == 0); + assertFalse(synthesizer.isLoadedVoiceModel(model.id)); + } } @Test void checkAudioQuery() throws RunModelException, InvalidModelDataException { - VoiceModel model = loadModel(); Onnxruntime onnxruntime = loadOnnxruntime(); OpenJtalk openJtalk = loadOpenJtalk(); Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); - synthesizer.loadVoiceModel(model); - AudioQuery query = synthesizer.createAudioQuery("こんにちは", model.metas[0].styles[0].id); - synthesizer.synthesis(query, model.metas[0].styles[0].id).execute(); + try (VoiceModelFile model = openModel()) { + synthesizer.loadVoiceModel(model); + } + + AudioQuery query = synthesizer.createAudioQuery("こんにちは", synthesizer.metas()[0].styles[0].id); + synthesizer.synthesis(query, synthesizer.metas()[0].styles[0].id).execute(); } @Test void checkAccentPhrases() throws RunModelException, InvalidModelDataException { - VoiceModel model = loadModel(); OpenJtalk openJtalk = loadOpenJtalk(); Onnxruntime onnxruntime = loadOnnxruntime(); Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); - synthesizer.loadVoiceModel(model); + try (VoiceModelFile model = openModel()) { + synthesizer.loadVoiceModel(model); + } List accentPhrases = - synthesizer.createAccentPhrases("こんにちは", model.metas[0].styles[0].id); + synthesizer.createAccentPhrases("こんにちは", synthesizer.metas()[0].styles[0].id); List accentPhrases2 = - synthesizer.replaceMoraPitch(accentPhrases, model.metas[1].styles[0].id); + synthesizer.replaceMoraPitch(accentPhrases, synthesizer.metas()[1].styles[0].id); assertTrue( checkAllMoras( accentPhrases, accentPhrases2, (mora, otherMora) -> mora.pitch != otherMora.pitch)); List accentPhrases3 = - synthesizer.replacePhonemeLength(accentPhrases, model.metas[1].styles[0].id); + synthesizer.replacePhonemeLength(accentPhrases, synthesizer.metas()[1].styles[0].id); assertTrue( checkAllMoras( accentPhrases, accentPhrases3, (mora, otherMora) -> mora.vowelLength != otherMora.vowelLength)); List accentPhrases4 = - synthesizer.replaceMoraData(accentPhrases, model.metas[1].styles[0].id); + synthesizer.replaceMoraData(accentPhrases, synthesizer.metas()[1].styles[0].id); assertTrue( checkAllMoras( accentPhrases, @@ -111,11 +115,12 @@ void checkAccentPhrases() throws RunModelException, InvalidModelDataException { @Test void checkTts() throws RunModelException, InvalidModelDataException { - VoiceModel model = loadModel(); Onnxruntime onnxruntime = loadOnnxruntime(); OpenJtalk openJtalk = loadOpenJtalk(); Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); - synthesizer.loadVoiceModel(model); - synthesizer.tts("こんにちは", model.metas[0].styles[0].id); + try (VoiceModelFile model = openModel()) { + synthesizer.loadVoiceModel(model); + } + synthesizer.tts("こんにちは", synthesizer.metas()[0].styles[0].id); } } diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java index 9ab731cd9..f505c327f 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java @@ -3,13 +3,13 @@ import java.io.File; class TestUtils { - VoiceModel loadModel() { + VoiceModelFile openModel() { // cwdはvoicevox_core/crates/voicevox_core_java_api/lib String cwd = System.getProperty("user.dir"); File path = new File(cwd + "/../../test_util/data/model/sample.vvm"); try { - return new VoiceModel(path.getCanonicalPath()); + return new VoiceModelFile(path.getCanonicalPath()); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java index 2fcfc06ab..ed9a94e8e 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java @@ -15,21 +15,24 @@ class UserDictTest extends TestUtils { // 辞書ロード前後でkanaが異なることを確認する @Test void checkLoad() throws RunModelException, InvalidModelDataException, LoadUserDictException { - VoiceModel model = loadModel(); Onnxruntime onnxruntime = loadOnnxruntime(); OpenJtalk openJtalk = loadOpenJtalk(); Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); UserDict userDict = new UserDict(); - synthesizer.loadVoiceModel(model); + try (VoiceModelFile model = openModel()) { + synthesizer.loadVoiceModel(model); + } AudioQuery query1 = synthesizer.createAudioQuery( - "this_word_should_not_exist_in_default_dictionary", model.metas[0].styles[0].id); + "this_word_should_not_exist_in_default_dictionary", + synthesizer.metas()[0].styles[0].id); userDict.addWord(new UserDict.Word("this_word_should_not_exist_in_default_dictionary", "テスト")); openJtalk.useUserDict(userDict); AudioQuery query2 = synthesizer.createAudioQuery( - "this_word_should_not_exist_in_default_dictionary", model.metas[0].styles[0].id); + "this_word_should_not_exist_in_default_dictionary", + synthesizer.metas()[0].styles[0].id); assertTrue(query1.kana != query2.kana); } diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/VoiceModelTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/VoiceModelTest.java index 5a720b07f..2bdba9c28 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/VoiceModelTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/VoiceModelTest.java @@ -17,7 +17,10 @@ class VoiceModelTest extends TestUtils { @Test void idShouldBePreservedAsIs() throws IOException { UUID expected = UUID.fromString(Manifest.readJson().id); - UUID actual = loadModel().id; + UUID actual; + try (VoiceModelFile model = openModel()) { + actual = model.id; + } assertEquals(expected, actual); } diff --git a/crates/voicevox_core_java_api/src/synthesizer.rs b/crates/voicevox_core_java_api/src/synthesizer.rs index 9ebd98e47..98c4f02f0 100644 --- a/crates/voicevox_core_java_api/src/synthesizer.rs +++ b/crates/voicevox_core_java_api/src/synthesizer.rs @@ -107,7 +107,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsLoadVoice ) { throw_if_err(env, (), |env| { let model = env - .get_rust_field::<_, _, Arc>(&model, "handle")? + .get_rust_field::<_, _, Arc>(&model, "handle")? .clone(); let internal = env .get_rust_field::<_, _, Arc>>( diff --git a/crates/voicevox_core_java_api/src/voice_model.rs b/crates/voicevox_core_java_api/src/voice_model.rs index 1ea90ba8c..66bd3ca0a 100644 --- a/crates/voicevox_core_java_api/src/voice_model.rs +++ b/crates/voicevox_core_java_api/src/voice_model.rs @@ -8,7 +8,9 @@ use jni::{ }; #[no_mangle] -unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsFromPath<'local>( +unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModelFile_00024Opened_rsOpen< + 'local, +>( env: JNIEnv<'local>, this: JObject<'local>, model_path: JString<'local>, @@ -17,7 +19,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsFromPath<' let model_path = env.get_string(&model_path)?; let model_path = &*Cow::from(&model_path); - let internal = voicevox_core::blocking::VoiceModel::from_path(model_path)?; + let internal = voicevox_core::blocking::VoiceModelFile::open(model_path)?; env.set_rust_field(&this, "handle", Arc::new(internal))?; @@ -26,13 +28,15 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsFromPath<' } #[no_mangle] -unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsGetId<'local>( +unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModelFile_00024Opened_rsGetId< + 'local, +>( env: JNIEnv<'local>, this: JObject<'local>, ) -> jobject { throw_if_err(env, std::ptr::null_mut(), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let id = env.new_uuid(internal.id().raw_voice_model_id())?; @@ -42,13 +46,15 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsGetId<'loc } #[no_mangle] -unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsGetMetasJson<'local>( +unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModelFile_00024Opened_rsGetMetasJson< + 'local, +>( env: JNIEnv<'local>, this: JObject<'local>, ) -> jobject { throw_if_err(env, std::ptr::null_mut(), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let metas = internal.metas(); @@ -58,7 +64,9 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsGetMetasJs } #[no_mangle] -unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsDrop<'local>( +unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModelFile_00024Opened_rsDrop< + 'local, +>( env: JNIEnv<'local>, this: JObject<'local>, ) { diff --git a/crates/voicevox_core_python_api/Cargo.toml b/crates/voicevox_core_python_api/Cargo.toml index e0877b623..c9605418e 100644 --- a/crates/voicevox_core_python_api/Cargo.toml +++ b/crates/voicevox_core_python_api/Cargo.toml @@ -8,8 +8,10 @@ publish.workspace = true crate-type = ["cdylib"] [dependencies] +blocking.workspace = true camino.workspace = true easy-ext.workspace = true +futures-util.workspace = true log.workspace = true once_cell.workspace = true pyo3 = { workspace = true, features = ["abi3-py38", "extension-module"] } diff --git a/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py b/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py index aea4af999..3b6f857e3 100644 --- a/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py +++ b/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py @@ -7,15 +7,15 @@ import conftest import pytest import pytest_asyncio -from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile -def test_voice_model_metas_works(voice_model: VoiceModel) -> None: +def test_voice_model_metas_works(voice_model: VoiceModelFile) -> None: _ = voice_model.metas @pytest.mark.asyncio -async def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: +async def test_synthesizer_metas_works(voice_model: VoiceModelFile) -> None: synthesizer = Synthesizer( await Onnxruntime.load_once(filename=conftest.onnxruntime_filename), await OpenJtalk.new(conftest.open_jtalk_dic_dir), @@ -25,5 +25,5 @@ async def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: @pytest_asyncio.fixture -async def voice_model() -> VoiceModel: - return await VoiceModel.from_path(conftest.model_dir) +async def voice_model() -> VoiceModelFile: + return await VoiceModelFile.open(conftest.model_dir) diff --git a/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py b/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py index d6906a6ac..b6fe50986 100644 --- a/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py +++ b/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py @@ -19,7 +19,7 @@ async def test_user_dict_load() -> None: filename=conftest.onnxruntime_filename ) open_jtalk = await voicevox_core.asyncio.OpenJtalk.new(conftest.open_jtalk_dic_dir) - model = await voicevox_core.asyncio.VoiceModel.from_path(conftest.model_dir) + model = await voicevox_core.asyncio.VoiceModelFile.open(conftest.model_dir) synthesizer = voicevox_core.asyncio.Synthesizer(onnxruntime, open_jtalk) await synthesizer.load_voice_model(model) diff --git a/crates/voicevox_core_python_api/python/test/test_blocking_metas.py b/crates/voicevox_core_python_api/python/test/test_blocking_metas.py index 00eade04b..a6aa6441d 100644 --- a/crates/voicevox_core_python_api/python/test/test_blocking_metas.py +++ b/crates/voicevox_core_python_api/python/test/test_blocking_metas.py @@ -6,14 +6,14 @@ import conftest import pytest -from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile -def test_voice_model_metas_works(voice_model: VoiceModel) -> None: +def test_voice_model_metas_works(voice_model: VoiceModelFile) -> None: _ = voice_model.metas -def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: +def test_synthesizer_metas_works(voice_model: VoiceModelFile) -> None: synthesizer = Synthesizer( Onnxruntime.load_once(filename=conftest.onnxruntime_filename), OpenJtalk(conftest.open_jtalk_dic_dir), @@ -23,5 +23,5 @@ def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: @pytest.fixture -def voice_model() -> VoiceModel: - return VoiceModel.from_path(conftest.model_dir) +def voice_model() -> VoiceModelFile: + return VoiceModelFile.open(conftest.model_dir) diff --git a/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py b/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py index 198becbe2..e8a5bd350 100644 --- a/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py +++ b/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py @@ -17,7 +17,7 @@ def test_user_dict_load() -> None: filename=conftest.onnxruntime_filename ) open_jtalk = voicevox_core.blocking.OpenJtalk(conftest.open_jtalk_dic_dir) - model = voicevox_core.blocking.VoiceModel.from_path(conftest.model_dir) + model = voicevox_core.blocking.VoiceModelFile.open(conftest.model_dir) synthesizer = voicevox_core.blocking.Synthesizer(onnxruntime, open_jtalk) synthesizer.load_voice_model(model) diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi index 7652a7d2c..66c4d4b50 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi @@ -14,14 +14,14 @@ if TYPE_CHECKING: VoiceModelId, ) -class VoiceModel: +class VoiceModelFile: """ - 音声モデル。""" + 音声モデルファイル。""" @staticmethod - async def from_path(path: Union[str, PathLike[str]]) -> VoiceModel: + async def open(path: Union[str, PathLike[str]]) -> VoiceModelFile: """ - VVMファイルから ``VoiceModel`` を生成する。 + VVMファイルを開く。 Parameters ---------- @@ -29,6 +29,20 @@ class VoiceModel: VVMファイルへのパス。 """ ... + async def close(self) -> None: + """ + VVMファイルを閉じる。 + + Caveats + ------- + このメソッドが呼ばれた段階で :attr:`Synthesizer.load_voice_model` + などのアクセスが継続中の場合、例外が発生する。 + + 他の言語バインディング、例えばJava + APIではアクセスが全部終わるのを待ってから処理に移るようになっており、将来的にはPython + APIも同様になるかもしれない。 + """ + ... @property def id(self) -> VoiceModelId: """ID。""" @@ -37,6 +51,8 @@ class VoiceModel: def metas(self) -> List[SpeakerMeta]: """メタ情報。""" ... + async def __aenter__(self) -> "VoiceModelFile": ... + async def __aexit__(self, exc_type, exc_value, traceback) -> None: ... class Onnxruntime: """ @@ -174,7 +190,7 @@ class Synthesizer: def metas(self) -> List[SpeakerMeta]: """メタ情報。""" ... - async def load_voice_model(self, model: VoiceModel) -> None: + async def load_voice_model(self, model: VoiceModelFile) -> None: """ モデルを読み込む。 diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi index 602ff31bc..88bdfa575 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi @@ -14,14 +14,14 @@ if TYPE_CHECKING: VoiceModelId, ) -class VoiceModel: +class VoiceModelFile: """ - 音声モデル。""" + 音声モデルファイル。""" @staticmethod - def from_path(path: Union[str, PathLike[str]]) -> VoiceModel: + def open(path: Union[str, PathLike[str]]) -> VoiceModelFile: """ - VVMファイルから ``VoiceModel`` を生成する。 + VVMファイルを開く。 Parameters ---------- @@ -29,6 +29,20 @@ class VoiceModel: VVMファイルへのパス。 """ ... + def close(self) -> None: + """ + VVMファイルを閉じる。 + + Caveats + ------- + このメソッドが呼ばれた段階で :attr:`Synthesizer.load_voice_model` + などのアクセスが継続中の場合、例外が発生する。 + + 他の言語バインディング、例えばJava + APIではアクセスが全部終わるのを待ってから処理に移るようになっており、将来的にはPython + APIも同様になるかもしれない。 + """ + ... @property def id(self) -> VoiceModelId: """ID。""" @@ -37,6 +51,8 @@ class VoiceModel: def metas(self) -> List[SpeakerMeta]: """メタ情報。""" ... + def __enter__(self) -> "VoiceModelFile": ... + def __exit__(self, exc_type, exc_value, traceback) -> None: ... class Onnxruntime: """ @@ -169,7 +185,7 @@ class Synthesizer: def metas(self) -> List[SpeakerMeta]: """メタ情報。""" ... - def load_voice_model(self, model: VoiceModel) -> None: + def load_voice_model(self, model: VoiceModelFile) -> None: """ モデルを読み込む。 diff --git a/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py b/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py index 2cff19cdf..0dc5e0adb 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py @@ -1,4 +1,4 @@ # pyright: reportMissingModuleSource=false -from ._rust.asyncio import Onnxruntime, OpenJtalk, Synthesizer, UserDict, VoiceModel +from ._rust.asyncio import Onnxruntime, OpenJtalk, Synthesizer, UserDict, VoiceModelFile -__all__ = ["Onnxruntime", "OpenJtalk", "Synthesizer", "UserDict", "VoiceModel"] +__all__ = ["Onnxruntime", "OpenJtalk", "Synthesizer", "UserDict", "VoiceModelFile"] diff --git a/crates/voicevox_core_python_api/python/voicevox_core/blocking.py b/crates/voicevox_core_python_api/python/voicevox_core/blocking.py index 7fed5fac7..01ea45029 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/blocking.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/blocking.py @@ -1,4 +1,10 @@ # pyright: reportMissingModuleSource=false -from ._rust.blocking import Onnxruntime, OpenJtalk, Synthesizer, UserDict, VoiceModel +from ._rust.blocking import ( + Onnxruntime, + OpenJtalk, + Synthesizer, + UserDict, + VoiceModelFile, +) -__all__ = ["Onnxruntime", "OpenJtalk", "Synthesizer", "UserDict", "VoiceModel"] +__all__ = ["Onnxruntime", "OpenJtalk", "Synthesizer", "UserDict", "VoiceModelFile"] diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs index c09fafdc8..d1018adf6 100644 --- a/crates/voicevox_core_python_api/src/lib.rs +++ b/crates/voicevox_core_python_api/src/lib.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use std::{fmt::Display, marker::PhantomData, mem, sync::Arc}; mod convert; use self::convert::{from_utf8_path, VoicevoxCoreResultExt as _}; @@ -27,7 +27,7 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> { blocking_module.add_class::()?; blocking_module.add_class::()?; blocking_module.add_class::()?; - blocking_module.add_class::()?; + blocking_module.add_class::()?; blocking_module.add_class::()?; module.add_and_register_submodule(blocking_module)?; @@ -35,7 +35,7 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> { asyncio_module.add_class::()?; asyncio_module.add_class::()?; asyncio_module.add_class::()?; - asyncio_module.add_class::()?; + asyncio_module.add_class::()?; asyncio_module.add_class::()?; module.add_and_register_submodule(asyncio_module) } @@ -116,17 +116,41 @@ impl Closable { } } - fn close(&mut self) { + #[must_use = "中身は明示的に`drop`でdropすること"] + fn close(&mut self) -> Option { if matches!(self.content, MaybeClosed::Open(_)) { debug!("Closing a {}", C::NAME); } - self.content = MaybeClosed::Closed; + match mem::replace(&mut self.content, MaybeClosed::Closed) { + MaybeClosed::Open(content) => Some(content), + MaybeClosed::Closed => None, + } + } +} + +impl Closable, C> { + fn close_arc(&mut self, display: F) -> PyResult> + where + F: FnOnce(&T) -> M, + M: Display, + { + self.close() + .map(|this| { + let display = display(&this); + Arc::into_inner(this).ok_or_else(|| { + PyException::new_err(format!( + "この`{}` ({display})はまだ使われています", + C::NAME, + )) + }) + }) + .transpose() } } impl Drop for Closable { fn drop(&mut self) { - self.close(); + drop(self.close()); } } @@ -159,29 +183,63 @@ mod blocking { #[pyclass] #[derive(Clone)] - pub(crate) struct VoiceModel { - model: Arc, + pub(crate) struct VoiceModelFile { + model: Arc, Self>>>, } #[pymethods] - impl VoiceModel { + impl VoiceModelFile { #[staticmethod] - fn from_path(py: Python<'_>, path: PathBuf) -> PyResult { - let model = voicevox_core::blocking::VoiceModel::from_path(path) + fn open(py: Python<'_>, path: PathBuf) -> PyResult { + let model = voicevox_core::blocking::VoiceModelFile::open(path) .into_py_result(py)? .into(); + let model = std::sync::Mutex::new(Closable::new(model)).into(); Ok(Self { model }) } + fn close(&self) -> PyResult<()> { + let this = self.lock().close_arc(|this| this.id())?; + drop(this); + Ok(()) + } + #[getter] fn id(&self, py: Python<'_>) -> PyResult { - let id = self.model.id().raw_voice_model_id(); + let id = self.lock().get()?.id().raw_voice_model_id(); crate::convert::to_py_uuid(py, id) } #[getter] - fn metas<'py>(&self, py: Python<'py>) -> Vec<&'py PyAny> { - crate::convert::to_pydantic_voice_model_meta(self.model.metas(), py).unwrap() + fn metas<'py>(&self, py: Python<'py>) -> PyResult> { + let this = self.lock().get()?.clone(); + crate::convert::to_pydantic_voice_model_meta(this.metas(), py) + } + + fn __enter__(slf: PyRef<'_, Self>) -> PyResult> { + slf.lock().get()?; + Ok(slf) + } + + fn __exit__( + &self, + #[expect(unused_variables, reason = "`__exit__`としては必要")] exc_type: &PyAny, + #[expect(unused_variables, reason = "`__exit__`としては必要")] exc_value: &PyAny, + #[expect(unused_variables, reason = "`__exit__`としては必要")] traceback: &PyAny, + ) -> PyResult<()> { + self.close() + } + } + + impl VoiceModelFile { + /// # Panics + /// + /// `Mutex`が"poisoned"な状態なときパニックする。 + fn lock( + &self, + ) -> std::sync::MutexGuard<'_, Closable, Self>> + { + self.model.lock().unwrap_or_else(|e| panic!("{e}")) } } @@ -349,11 +407,9 @@ mod blocking { } fn load_voice_model(&mut self, model: &PyAny, py: Python<'_>) -> PyResult<()> { - let model: VoiceModel = model.extract()?; - self.synthesizer - .get()? - .load_voice_model(&model.model) - .into_py_result(py) + let this = self.synthesizer.get()?; + let model = &model.extract::()?.lock().get()?.clone(); + this.load_voice_model(model).into_py_result(py) } fn unload_voice_model( @@ -567,7 +623,7 @@ mod blocking { } fn close(&mut self) { - self.synthesizer.close() + drop(self.synthesizer.close()); } } @@ -646,6 +702,7 @@ mod asyncio { use std::{ffi::OsString, path::PathBuf, sync::Arc}; use camino::Utf8PathBuf; + use futures_util::FutureExt as _; use pyo3::{ pyclass, pymethods, types::{IntoPyDict as _, PyBytes, PyDict, PyList}, @@ -661,30 +718,77 @@ mod asyncio { #[pyclass] #[derive(Clone)] - pub(crate) struct VoiceModel { - model: Arc, + pub(crate) struct VoiceModelFile { + model: + Arc, Self>>>, } #[pymethods] - impl VoiceModel { + impl VoiceModelFile { #[staticmethod] - fn from_path(py: Python<'_>, path: PathBuf) -> PyResult<&PyAny> { + fn open(py: Python<'_>, path: PathBuf) -> PyResult<&PyAny> { pyo3_asyncio::tokio::future_into_py(py, async move { - let model = voicevox_core::nonblocking::VoiceModel::from_path(path).await; - let model = Python::with_gil(|py| model.into_py_result(py))?.into(); + let model = voicevox_core::nonblocking::VoiceModelFile::open(path).await; + let model = std::sync::Mutex::new(Closable::new( + Python::with_gil(|py| model.into_py_result(py))?.into(), + )) + .into(); Ok(Self { model }) }) } + fn close<'py>(&self, py: Python<'py>) -> PyResult<&'py PyAny> { + let this = self.lock().close_arc(|this| this.id())?; + match this { + Some(this) => pyo3_asyncio::tokio::future_into_py(py, this.close().map(Ok)), + None => pyo3_asyncio::tokio::future_into_py(py, async { Ok(()) }), + } + } + #[getter] fn id(&self, py: Python<'_>) -> PyResult { - let id = self.model.id().raw_voice_model_id(); + let id = self.lock().get()?.id().raw_voice_model_id(); crate::convert::to_py_uuid(py, id) } #[getter] - fn metas<'py>(&self, py: Python<'py>) -> Vec<&'py PyAny> { - crate::convert::to_pydantic_voice_model_meta(self.model.metas(), py).unwrap() + fn metas<'py>(&self, py: Python<'py>) -> PyResult> { + let this = self.lock().get()?.clone(); + crate::convert::to_pydantic_voice_model_meta(this.metas(), py) + } + + fn __aenter__(slf: PyRef<'_, Self>) -> PyResult<&PyAny> { + let py = slf.py(); + let asyncio_future = py.import("asyncio")?.getattr("Future")?; + slf.lock().get()?; + let running_loop = pyo3_asyncio::get_running_loop(py)?; + let fut = asyncio_future.call((), Some([("loop", running_loop)].into_py_dict(py)))?; + fut.call_method1("set_result", (slf,))?; + Ok(fut) + } + + fn __aexit__<'py>( + &self, + #[expect(unused_variables, reason = "`__aexit__`としては必要")] exc_type: &'py PyAny, + #[expect(unused_variables, reason = "`__aexit__`としては必要")] exc_value: &'py PyAny, + #[expect(unused_variables, reason = "`__aexit__`としては必要")] traceback: &'py PyAny, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + self.close(py) + } + } + + impl VoiceModelFile { + /// # Panics + /// + /// `Mutex`が"poisoned"な状態なときパニックする。 + fn lock( + &self, + ) -> std::sync::MutexGuard< + '_, + Closable, Self>, + > { + self.model.lock().unwrap_or_else(|e| panic!("{e}")) } } @@ -866,10 +970,11 @@ mod asyncio { model: &'py PyAny, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let model: VoiceModel = model.extract()?; + let model: VoiceModelFile = model.extract()?; let synthesizer = self.synthesizer.get()?.clone(); pyo3_asyncio::tokio::future_into_py(py, async move { - let result = synthesizer.load_voice_model(&model.model).await; + let model = &model.lock().get()?.clone(); + let result = synthesizer.load_voice_model(model).await; Python::with_gil(|py| result.into_py_result(py)) }) } @@ -1145,7 +1250,7 @@ mod asyncio { } fn close(&mut self) { - self.synthesizer.close() + drop(self.synthesizer.close()); } } diff --git a/docs/usage.md b/docs/usage.md index 067250126..26ed50810 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -62,15 +62,15 @@ VOICEVOX コアでは`Synthesizer`に音声モデルを読み込むことでテ ```python from pprint import pprint -from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile # 1. Synthesizerの初期化 open_jtalk_dict_dir = "open_jtalk_dic_utf_8-1.11" synthesizer = Synthesizer(Onnxruntime.load_once(), OpenJtalk(open_jtalk_dict_dir)) # 2. 音声モデルの読み込み -model = VoiceModel.from_path("model/0.vvm") -synthesizer.load_voice_model(model) +with VoiceModelFile.open("model/0.vvm") as model: + synthesizer.load_voice_model(model) # 3. テキスト音声合成 text = "サンプル音声です" @@ -86,11 +86,11 @@ AIエンジンの`Onnxruntime`のインスタンスと、辞書などを取り ### 2. 音声モデルの読み込み -VVM ファイルから`VoiceModel`インスタンスを作成し、`Synthesizer`に読み込ませます。その VVM ファイルにどの声が含まれているかは`VoiceModel`の`.metas`や[音声モデルと声の対応表](https://github.com/VOICEVOX/voicevox_fat_resource/blob/main/core/model/README.md#%E9%9F%B3%E5%A3%B0%E3%83%A2%E3%83%87%E3%83%ABvvm%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB%E3%81%A8%E5%A3%B0%E3%82%AD%E3%83%A3%E3%83%A9%E3%82%AF%E3%82%BF%E3%83%BC%E3%82%B9%E3%82%BF%E3%82%A4%E3%83%AB%E5%90%8D%E3%81%A8%E3%82%B9%E3%82%BF%E3%82%A4%E3%83%AB-id-%E3%81%AE%E5%AF%BE%E5%BF%9C%E8%A1%A8)で確認できます。 +VVM ファイルから`VoiceModelFile`インスタンスを作成し、`Synthesizer`に読み込ませます。その VVM ファイルにどの声が含まれているかは`VoiceModelFile`の`.metas`や[音声モデルと声の対応表](https://github.com/VOICEVOX/voicevox_fat_resource/blob/main/core/model/README.md#%E9%9F%B3%E5%A3%B0%E3%83%A2%E3%83%87%E3%83%ABvvm%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB%E3%81%A8%E5%A3%B0%E3%82%AD%E3%83%A3%E3%83%A9%E3%82%AF%E3%82%BF%E3%83%BC%E3%82%B9%E3%82%BF%E3%82%A4%E3%83%AB%E5%90%8D%E3%81%A8%E3%82%B9%E3%82%BF%E3%82%A4%E3%83%AB-id-%E3%81%AE%E5%AF%BE%E5%BF%9C%E8%A1%A8)で確認できます。 ```python -model = VoiceModel.from_path("model/0.vvm") -pprint(model.metas) +with VoiceModelFile.open("model/0.vvm") as model: + pprint(model.metas) ``` ```txt diff --git a/example/cpp/unix/simple_tts.cpp b/example/cpp/unix/simple_tts.cpp index 5db24b12e..210df1549 100644 --- a/example/cpp/unix/simple_tts.cpp +++ b/example/cpp/unix/simple_tts.cpp @@ -47,8 +47,8 @@ int main(int argc, char *argv[]) { if (path.extension() != ".vvm") { continue; } - VoicevoxVoiceModel* model; - result = voicevox_voice_model_new_from_path(path.c_str(), &model); + VoicevoxVoiceModelFile* model; + result = voicevox_voice_model_file_open(path.c_str(), &model); if (result != VoicevoxResultCode::VOICEVOX_RESULT_OK) { std::cerr << voicevox_error_result_to_message(result) << std::endl; return 0; @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { std::cerr << voicevox_error_result_to_message(result) << std::endl; return 0; } - voicevox_voice_model_delete(model); + voicevox_voice_model_file_close(model); } std::cout << "音声生成中..." << std::endl; diff --git a/example/cpp/windows/simple_tts/simple_tts.cpp b/example/cpp/windows/simple_tts/simple_tts.cpp index 946ef9679..2bdc947c6 100644 --- a/example/cpp/windows/simple_tts/simple_tts.cpp +++ b/example/cpp/windows/simple_tts/simple_tts.cpp @@ -59,9 +59,8 @@ int main() { if (path.extension() != ".vvm") { continue; } - VoicevoxVoiceModel* model; - result = voicevox_voice_model_new_from_path(path.generic_u8string().c_str(), - &model); + VoicevoxVoiceModelFile* model; + result = voicevox_voice_model_file_open(path.generic_u8string().c_str(), &model); if (result != VoicevoxResultCode::VOICEVOX_RESULT_OK) { OutErrorMessage(result); return 0; @@ -71,7 +70,7 @@ int main() { OutErrorMessage(result); return 0; } - voicevox_voice_model_delete(model); + voicevox_voice_model_file_close(model); } std::wcout << L"音声生成中" << std::endl; diff --git a/example/kotlin/app/src/main/kotlin/app/App.kt b/example/kotlin/app/src/main/kotlin/app/App.kt index 7f2651020..0b8d05e33 100644 --- a/example/kotlin/app/src/main/kotlin/app/App.kt +++ b/example/kotlin/app/src/main/kotlin/app/App.kt @@ -47,7 +47,7 @@ fun main(args: Array) { .build() println("Loading: ${vvmPath}") - val vvm = VoiceModel(vvmPath) + val vvm = VoiceModelFile(vvmPath) synthesizer.loadVoiceModel(vvm) println("Creating an AudioQuery from the text: ${text}") diff --git a/example/python/run-asyncio.py b/example/python/run-asyncio.py index b75509183..176ac290f 100644 --- a/example/python/run-asyncio.py +++ b/example/python/run-asyncio.py @@ -9,7 +9,7 @@ from typing import Tuple from voicevox_core import AccelerationMode, AudioQuery -from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile async def main() -> None: @@ -45,8 +45,8 @@ async def main() -> None: logger.debug("%s", f"{synthesizer.is_gpu_mode=}") logger.info("%s", f"Loading `{vvm_path}`") - model = await VoiceModel.from_path(vvm_path) - await synthesizer.load_voice_model(model) + async with await VoiceModelFile.open(vvm_path) as model: + await synthesizer.load_voice_model(model) logger.info("%s", f"Creating an AudioQuery from {text!r}") audio_query = await synthesizer.audio_query(text, style_id) diff --git a/example/python/run.py b/example/python/run.py index 3a9fdd9e7..5f11a1a62 100644 --- a/example/python/run.py +++ b/example/python/run.py @@ -6,7 +6,7 @@ from typing import Tuple from voicevox_core import AccelerationMode, AudioQuery -from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile def main() -> None: @@ -42,8 +42,8 @@ def main() -> None: logger.debug("%s", f"{synthesizer.is_gpu_mode=}") logger.info("%s", f"Loading `{vvm_path}`") - model = VoiceModel.from_path(vvm_path) - synthesizer.load_voice_model(model) + with VoiceModelFile.open(vvm_path) as model: + synthesizer.load_voice_model(model) logger.info("%s", f"Creating an AudioQuery from {text!r}") audio_query = synthesizer.audio_query(text, style_id)