Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CとJavaのブロッキングAPIを実装 #705

Merged
merged 10 commits into from
Dec 10, 2023
2 changes: 0 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true, features = ["preserve_order"] }
tempfile.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["rt"] }
tokio = { workspace = true, features = ["rt"] } # FIXME: feature-gateする
tracing.workspace = true
uuid = { workspace = true, features = ["v4", "serde"] }
voicevox_core_macros = { path = "../voicevox_core_macros" }
Expand Down
54 changes: 9 additions & 45 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@ impl<O> self::blocking::Synthesizer<O> {
self.status.is_loaded_model(voice_model_id)
}

fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
#[doc(hidden)]
pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
self.status.is_loaded_model_by_style_id(style_id)
}

Expand Down Expand Up @@ -1039,47 +1040,6 @@ pub trait PerformInference {
) -> Result<Vec<f32>>;
}

impl<O> PerformInference for self::tokio::Synthesizer<O> {
fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
self.0.predict_duration(phoneme_vector, style_id)
}

fn predict_intonation(
&self,
length: usize,
vowel_phoneme_vector: &[i64],
consonant_phoneme_vector: &[i64],
start_accent_vector: &[i64],
end_accent_vector: &[i64],
start_accent_phrase_vector: &[i64],
end_accent_phrase_vector: &[i64],
style_id: StyleId,
) -> Result<Vec<f32>> {
self.0.predict_intonation(
length,
vowel_phoneme_vector,
consonant_phoneme_vector,
start_accent_vector,
end_accent_vector,
start_accent_phrase_vector,
end_accent_phrase_vector,
style_id,
)
}

fn decode(
&self,
length: usize,
phoneme_size: usize,
f0: &[f32],
phoneme_vector: &[f32],
style_id: StyleId,
) -> Result<Vec<f32>> {
self.0
.decode(length, phoneme_size, f0, phoneme_vector, style_id)
}
}

impl<O> PerformInference for self::blocking::Synthesizer<O> {
fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
// FIXME: `Status::ids_for`があるため、ここは不要なはず
Expand Down Expand Up @@ -1516,7 +1476,9 @@ mod tests {
30, 35, 14, 23, 7, 21, 14, 43, 30, 30, 23, 30, 35, 30, 0,
];

let result = syntesizer.predict_duration(&phoneme_vector, StyleId::new(1));
let result = syntesizer
.0
.predict_duration(&phoneme_vector, StyleId::new(1));

assert!(result.is_ok(), "{result:?}");
assert_eq!(result.unwrap().len(), phoneme_vector.len());
Expand Down Expand Up @@ -1546,7 +1508,7 @@ mod tests {
let start_accent_phrase_vector = [0, 1, 0, 0, 0];
let end_accent_phrase_vector = [0, 0, 0, 1, 0];

let result = syntesizer.predict_intonation(
let result = syntesizer.0.predict_intonation(
vowel_phoneme_vector.len(),
&vowel_phoneme_vector,
&consonant_phoneme_vector,
Expand Down Expand Up @@ -1599,7 +1561,9 @@ mod tests {
set_one(30, 45..60);
set_one(0, 60..69);

let result = syntesizer.decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1));
let result = syntesizer
.0
.decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1));

assert!(result.is_ok(), "{result:?}");
assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
Expand Down
1 change: 0 additions & 1 deletion crates/voicevox_core_c_api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ once_cell.workspace = true
process_path.workspace = true
serde_json = { workspace = true, features = ["preserve_order"] }
thiserror.workspace = true
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tracing.workspace = true
tracing-subscriber = { workspace = true, features = ["env-filter"] }
uuid.workspace = true
Expand Down
20 changes: 8 additions & 12 deletions crates/voicevox_core_c_api/src/c_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,25 @@ use voicevox_core::{InitializeOptions, Result, VoiceModelId};
use crate::{CApiResult, OpenJtalkRc, VoicevoxSynthesizer, VoicevoxVoiceModel};

impl OpenJtalkRc {
pub(crate) async fn new(open_jtalk_dic_dir: impl AsRef<Path>) -> Result<Self> {
pub(crate) fn new(open_jtalk_dic_dir: impl AsRef<Path>) -> Result<Self> {
Ok(Self {
open_jtalk: voicevox_core::tokio::OpenJtalk::new(open_jtalk_dic_dir).await?,
open_jtalk: voicevox_core::blocking::OpenJtalk::new(open_jtalk_dic_dir)?,
})
}
}

impl VoicevoxSynthesizer {
pub(crate) fn new(open_jtalk: &OpenJtalkRc, options: &InitializeOptions) -> Result<Self> {
// ロガーを起動
// FIXME: `into_result_code_with_error`を`run`とかに改名し、`init_logger`をその中に移動
let _ = *crate::RUNTIME;

let synthesizer =
voicevox_core::tokio::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?;
voicevox_core::blocking::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?;
Ok(Self { synthesizer })
}

pub(crate) async fn load_voice_model(
pub(crate) fn load_voice_model(
&self,
model: &voicevox_core::tokio::VoiceModel,
model: &voicevox_core::blocking::VoiceModel,
) -> CApiResult<()> {
self.synthesizer.load_voice_model(model).await?;
self.synthesizer.load_voice_model(model)?;
Ok(())
}

Expand All @@ -43,8 +39,8 @@ impl VoicevoxSynthesizer {
}

impl VoicevoxVoiceModel {
pub(crate) async fn from_path(path: impl AsRef<Path>) -> Result<Self> {
let model = voicevox_core::tokio::VoiceModel::from_path(path).await?;
pub(crate) fn from_path(path: impl AsRef<Path>) -> Result<Self> {
let model = voicevox_core::blocking::VoiceModel::from_path(path)?;
let id = CString::new(model.id().raw_voice_model_id().as_str()).unwrap();
let metas = CString::new(serde_json::to_string(model.metas()).unwrap()).unwrap();
Ok(Self { model, id, metas })
Expand Down
42 changes: 23 additions & 19 deletions crates/voicevox_core_c_api/src/compatible_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ macro_rules! ensure_initialized {
static ERROR_MESSAGE: Lazy<Mutex<String>> = Lazy::new(|| Mutex::new(String::new()));

struct VoiceModelSet {
all_vvms: Vec<voicevox_core::tokio::VoiceModel>,
all_vvms: Vec<voicevox_core::blocking::VoiceModel>,
all_metas_json: CString,
style_model_map: BTreeMap<StyleId, VoiceModelId>,
model_map: BTreeMap<VoiceModelId, voicevox_core::tokio::VoiceModel>,
model_map: BTreeMap<VoiceModelId, voicevox_core::blocking::VoiceModel>,
}

static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
let all_vvms = RUNTIME.block_on(get_all_models());
let all_vvms = get_all_models();
let model_map: BTreeMap<_, _> = all_vvms
.iter()
.map(|vvm| (vvm.id().clone(), vvm.clone()))
Expand All @@ -52,7 +52,7 @@ static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
/// # Panics
///
/// 失敗したらパニックする
async fn get_all_models() -> Vec<voicevox_core::tokio::VoiceModel> {
fn get_all_models() -> Vec<voicevox_core::blocking::VoiceModel> {
let root_dir = if let Some(root_dir) = env::var_os(ROOT_DIR_ENV_NAME) {
root_dir.into()
} else {
Expand All @@ -64,17 +64,13 @@ static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
.join("model")
};

let vvm_paths = root_dir
root_dir
.read_dir()
.and_then(|entries| entries.collect::<std::result::Result<Vec<_>, _>>())
.unwrap_or_else(|e| panic!("{}が読めませんでした: {e}", root_dir.display()))
.into_iter()
.filter(|entry| entry.path().extension().map_or(false, |ext| ext == "vvm"))
.map(|entry| voicevox_core::tokio::VoiceModel::from_path(entry.path()));

futures::future::join_all(vvm_paths)
.await
.into_iter()
.map(|entry| voicevox_core::blocking::VoiceModel::from_path(entry.path()))
.collect::<std::result::Result<_, _>>()
.unwrap()
}
Expand All @@ -88,10 +84,10 @@ fn voice_model_set() -> &'static VoiceModelSet {
&VOICE_MODEL_SET
}

static SYNTHESIZER: Lazy<Mutex<Option<voicevox_core::tokio::Synthesizer<()>>>> =
static SYNTHESIZER: Lazy<Mutex<Option<voicevox_core::blocking::Synthesizer<()>>>> =
Lazy::new(|| Mutex::new(None));

fn lock_synthesizer() -> MutexGuard<'static, Option<voicevox_core::tokio::Synthesizer<()>>> {
fn lock_synthesizer() -> MutexGuard<'static, Option<voicevox_core::blocking::Synthesizer<()>>> {
SYNTHESIZER.lock().unwrap()
}

Expand All @@ -104,10 +100,9 @@ fn set_message(message: &str) {

#[no_mangle]
pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_models: bool) -> bool {
// FIXME: ここはもう`RUNTIME.block_on`で包む必要は無くなっているのだが、ロガーの設定を`RUNTIME`
// で行っているという構造になってしまっているので、外すとロガーの初期化が遅れてしまでう
let result = RUNTIME.block_on(async {
let synthesizer = voicevox_core::tokio::Synthesizer::new(
init_logger_once();
let result = (|| {
let synthesizer = voicevox_core::blocking::Synthesizer::new(
(),
&voicevox_core::InitializeOptions {
acceleration_mode: if use_gpu {
Expand All @@ -121,12 +116,12 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod

if load_all_models {
for model in &voice_model_set().all_vvms {
synthesizer.load_voice_model(model).await?;
synthesizer.load_voice_model(model)?;
}
}

Ok::<_, voicevox_core::Error>(synthesizer)
});
})();

match result {
Ok(synthesizer) => {
Expand All @@ -142,12 +137,13 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod

#[no_mangle]
pub extern "C" fn load_model(style_id: i64) -> bool {
init_logger_once();
let style_id = StyleId::new(style_id as u32);
let model_set = voice_model_set();
if let Some(model_id) = model_set.style_model_map.get(&style_id) {
let vvm = model_set.model_map.get(model_id).unwrap();
let synthesizer = &mut *lock_synthesizer();
let result = RUNTIME.block_on(ensure_initialized!(synthesizer).load_voice_model(vvm));
let result = ensure_initialized!(synthesizer).load_voice_model(vvm);
if let Some(err) = result.err() {
set_message(&format!("{err}"));
false
Expand All @@ -162,28 +158,33 @@ pub extern "C" fn load_model(style_id: i64) -> bool {

#[no_mangle]
pub extern "C" fn is_model_loaded(speaker_id: i64) -> bool {
init_logger_once();
ensure_initialized!(&*lock_synthesizer())
.is_loaded_model_by_style_id(StyleId::new(speaker_id as u32))
}

#[no_mangle]
pub extern "C" fn finalize() {
init_logger_once();
*lock_synthesizer() = None;
}

#[no_mangle]
pub extern "C" fn metas() -> *const c_char {
init_logger_once();
let model_set = voice_model_set();
model_set.all_metas_json.as_ptr()
}

#[no_mangle]
pub extern "C" fn last_error_message() -> *const c_char {
init_logger_once();
ERROR_MESSAGE.lock().unwrap().as_ptr() as *const c_char
}

#[no_mangle]
pub extern "C" fn supported_devices() -> *const c_char {
init_logger_once();
return SUPPORTED_DEVICES.as_ptr();

static SUPPORTED_DEVICES: Lazy<CString> = Lazy::new(|| {
Expand All @@ -198,6 +199,7 @@ pub extern "C" fn yukarin_s_forward(
speaker_id: *mut i64,
output: *mut f32,
) -> bool {
init_logger_once();
let synthesizer = &*lock_synthesizer();
let result = ensure_initialized!(synthesizer).predict_duration(
unsafe { std::slice::from_raw_parts_mut(phoneme_list, length as usize) },
Expand Down Expand Up @@ -228,6 +230,7 @@ pub extern "C" fn yukarin_sa_forward(
speaker_id: *mut i64,
output: *mut f32,
) -> bool {
init_logger_once();
let synthesizer = &*lock_synthesizer();
let result = ensure_initialized!(synthesizer).predict_intonation(
length as usize,
Expand Down Expand Up @@ -261,6 +264,7 @@ pub extern "C" fn decode_forward(
speaker_id: *mut i64,
output: *mut f32,
) -> bool {
init_logger_once();
let length = length as usize;
let phoneme_size = phoneme_size as usize;
let synthesizer = &*lock_synthesizer();
Expand Down
Loading
Loading