Skip to content

Commit

Permalink
feat!: [downloader] c-apiadditional-librariesのディレクトリを掘る
Browse files Browse the repository at this point in the history
  • Loading branch information
qryxip committed Jan 25, 2025
1 parent 8112ecb commit 6bfe018
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 47 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ jobs:
cargo xtask update-c-header --verify
git diff
# TODO: "build-and-test-…"にする
build-unix-cpp-example:
strategy:
fail-fast: false
Expand All @@ -200,10 +201,10 @@ jobs:
run: cargo build -p voicevox_core_c_api --features load-onnxruntime -v
- name: 必要なfileをunix用exampleのディレクトリに移動させる
run: |
mkdir -p example/cpp/unix/voicevox_core/
mkdir -p example/cpp/unix/voicevox_core/c_api
sed 's:^//\(#define VOICEVOX_LOAD_ONNXRUNTIME\)$:\1:' \
crates/voicevox_core_c_api/include/voicevox_core.h \
> example/cpp/unix/voicevox_core/voicevox_core.h
> example/cpp/unix/voicevox_core/c_api/voicevox_core.h
cp -v target/debug/libvoicevox_core.{so,dylib} example/cpp/unix/voicevox_core/ || true
cp -v target/debug/libonnxruntime.so.* example/cpp/unix/voicevox_core/ || true
cp -v target/debug/libonnxruntime.*.dylib example/cpp/unix/voicevox_core/ || true
Expand All @@ -221,6 +222,7 @@ jobs:
cmake -S . -B build
cmake --build build
# TODO: "build-and-test-…"にする
build-windows-cpp-example:
strategy:
fail-fast: false
Expand Down
23 changes: 8 additions & 15 deletions crates/downloader/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,15 +371,15 @@ async fn main() -> anyhow::Result<()> {
tasks.spawn(download_and_extract_from_gh(
c_api,
Stripping::FirstDir,
&output,
output.join("c_api"),
&progresses,
)?);
}
if let Some(onnxruntime) = onnxruntime {
tasks.spawn(download_and_extract_from_gh(
onnxruntime,
Stripping::FirstDir,
&output.join("onnxruntime"),
output.join("onnxruntime"),
&progresses,
)?);
}
Expand All @@ -388,23 +388,19 @@ async fn main() -> anyhow::Result<()> {
tasks.spawn(download_and_extract_from_gh(
additional_libraries,
Stripping::FirstDir,
&output,
output.join("additional_libraries"),
&progresses,
)?);
}
}
if let Some(models) = models {
tasks.spawn(download_models(
models,
&output.join("models"),
&progresses,
)?);
tasks.spawn(download_models(models, output.join("models"), &progresses)?);
}
if targets.contains(&DownloadTarget::Dict) {
tasks.spawn(download_and_extract_from_url(
&OPEN_JTALK_DIC_URL,
Stripping::None,
&output,
output.join("dict"),
&progresses,
)?);
}
Expand Down Expand Up @@ -704,10 +700,9 @@ fn download_and_extract_from_gh(
..
}: GhAsset,
stripping: Stripping,
output: &Path,
output: PathBuf,
progresses: &MultiProgress,
) -> anyhow::Result<impl Future<Output = anyhow::Result<()>>> {
let output = output.to_owned();
let archive_kind = ArchiveKind::from_filename(&name)?;
let pb = add_progress_bar(progresses, size as _, name);

Expand All @@ -734,10 +729,9 @@ fn download_and_extract_from_gh(
fn download_and_extract_from_url(
url: &'static Url,
stripping: Stripping,
output: &Path,
output: PathBuf,
progresses: &MultiProgress,
) -> anyhow::Result<impl Future<Output = anyhow::Result<()>>> {
let output = output.to_owned();
let name = url
.path_segments()
.and_then(|s| s.last())
Expand Down Expand Up @@ -769,10 +763,9 @@ fn download_models(
models,
..
}: ModelsWithTerms,
output: &Path,
output: PathBuf,
progresses: &MultiProgress,
) -> anyhow::Result<impl Future<Output = anyhow::Result<()>>> {
let output = output.to_owned();
let reqwest = reqwest::Client::builder().build()?;

let models = models
Expand Down
6 changes: 3 additions & 3 deletions docs/guide/user/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ from pprint import pprint
from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile

# 1. Synthesizerの初期化
open_jtalk_dict_dir = "open_jtalk_dic_utf_8-1.11"
open_jtalk_dict_dir = "dict/open_jtalk_dic_utf_8-1.11"
synthesizer = Synthesizer(Onnxruntime.load_once(), OpenJtalk(open_jtalk_dict_dir))

# 2. 音声モデルの読み込み
with VoiceModelFile.open("model/0.vvm") as model:
with VoiceModelFile.open("models/vvms/0.vvm") as model:
synthesizer.load_voice_model(model)

# 3. テキスト音声合成
Expand All @@ -89,7 +89,7 @@ AIエンジンの`Onnxruntime`のインスタンスと、辞書などを取り
VVM ファイルから`VoiceModelFile`インスタンスを作成し、`Synthesizer`に読み込ませます。その VVM ファイルにどの声が含まれているかは`VoiceModelFile``.metas`[音声モデルと声の対応表](https://github.com/VOICEVOX/voicevox_fat_resource/blob/main/core/model/README.md#%E9%9F%B3%E5%A3%B0%E3%83%A2%E3%83%87%E3%83%ABvvm%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB%E3%81%A8%E5%A3%B0%E3%82%AD%E3%83%A3%E3%83%A9%E3%82%AF%E3%82%BF%E3%83%BC%E3%82%B9%E3%82%BF%E3%82%A4%E3%83%AB%E5%90%8D%E3%81%A8%E3%82%B9%E3%82%BF%E3%82%A4%E3%83%AB-id-%E3%81%AE%E5%AF%BE%E5%BF%9C%E8%A1%A8)で確認できます。

```python
with VoiceModelFile.open("model/0.vvm") as model:
with VoiceModelFile.open("models/vvms/0.vvm") as model:
pprint(model.metas)
```

Expand Down
6 changes: 3 additions & 3 deletions example/cpp/unix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ project(SimpleTTS)
add_executable(simple_tts simple_tts.cpp)
set_property(TARGET simple_tts PROPERTY CXX_STANDARD 17)

file(GLOB ONNXRUNTIME_SHARED_LIB ./libonnxruntime.so.* ./libonnxruntime.*.dylib)
target_link_directories(simple_tts PRIVATE ./voicevox_core)
file(GLOB ONNXRUNTIME_SHARED_LIB ./libonnxruntime.so.* ./libonnxruntime.*.dylib) # TODO: なにこれ
target_link_directories(simple_tts PRIVATE ./voicevox_core/c_api/)


file(GLOB CORE_LIB ./voicevox_core/libvoicevox_core.so.* ./voicevox_core/libvoicevox_core.*.dylib)
file(GLOB CORE_LIB ./voicevox_core/c_api/libvoicevox_core.so.* ./voicevox_core/c_api/libvoicevox_core.*.dylib) # TODO: なにこれ
target_link_libraries(simple_tts voicevox_core)
10 changes: 7 additions & 3 deletions example/cpp/unix/simple_tts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <iostream>
#include <string>

#include "voicevox_core/voicevox_core.h"
#include "voicevox_core/c_api/voicevox_core.h"

#define STYLE_ID 0
#define OUTPUT_WAV_NAME "audio.wav"
Expand All @@ -14,14 +14,18 @@ int main(int argc, char *argv[]) {
return 0;
}

std::string open_jtalk_dict_path("voicevox_core/open_jtalk_dic_utf_8-1.11");
std::string open_jtalk_dict_path(
"voicevox_core/dict/open_jtalk_dic_utf_8-1.11");
std::string text(argv[1]);

std::cout << "coreの初期化中..." << std::endl;

auto initialize_options = voicevox_make_default_initialize_options();
const VoicevoxOnnxruntime* onnxruntime;
auto load_ort_options = voicevox_make_default_load_onnxruntime_options();
std::string ort_filename = "./voicevox_core/onnxruntime/lib/";
ort_filename += voicevox_get_onnxruntime_lib_versioned_filename();
load_ort_options.filename = ort_filename.c_str();
auto result = voicevox_onnxruntime_load_once(load_ort_options, &onnxruntime);
if (result != VOICEVOX_RESULT_OK){
std::cerr << voicevox_error_result_to_message(result) << std::endl;
Expand All @@ -42,7 +46,7 @@ int main(int argc, char *argv[]) {
voicevox_open_jtalk_rc_delete(open_jtalk);

for (auto const& entry :
std::filesystem::directory_iterator{"./voicevox_core/model"}) {
std::filesystem::directory_iterator{"./voicevox_core/models/vvms"}) {
const auto path = entry.path();
if (path.extension() != ".vvm") {
continue;
Expand Down
6 changes: 3 additions & 3 deletions example/cpp/windows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ Visual Studio Installerを使用しインストールしてください。

- simple_tts\bin\x64\Debug に配置
- voicevox_core.dll
- onnxruntime.dll
- onnxruntime_providers_shared.dll
- modelフォルダ
- onnxruntime.dll (TODO: 要らないはず)
- onnxruntime_providers_shared.dll (TODO: 要らないはず)
- modelフォルダ (TODO: 要らないはず)

- simple_tts\lib\x64 に配置
- voicevox_core.lib
Expand Down
33 changes: 17 additions & 16 deletions example/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ https://github.com/VOICEVOX/voicevox_core/releases/latest

2. ダウンローダーを使って環境構築します。

FIXME: 今は`--exclude core`がある

linux/mac の場合

download-linux-x64 のところはアーキテクチャや OS によって適宜読み替えてください。
Expand All @@ -29,18 +27,14 @@ https://github.com/VOICEVOX/voicevox_core/releases/latest#%E3%83%80%E3%82%A6%E3%
binary=download-linux-x64
curl -sSfL https://github.com/VOICEVOX/voicevox_core/releases/latest/download/${binary} -o download
chmod +x download
./download -o ./example/python
# いくつかのファイルは不要なので消すことができます
#rm -r ./example/python/{model,VERSION,*voicevox_core*}
./download -o ./example/python --exclude c-api
```

windows の場合

```console
Invoke-WebRequest https://github.com/VOICEVOX/voicevox_core/releases/latest/download/download-windows-x64.exe -OutFile ./download.exe
./download -o ./example/python
# いくつかのファイルは不要なので消すことができます
#Remove-Item -Recurse ./example/python/model,./example/python/VERSION,./example/python/*voicevox_core*
./download -o ./example/python --exclude c-api
```

TODO:
Expand Down Expand Up @@ -71,16 +65,23 @@ optional arguments:
## 実行例

```console
python ./run.py ../../crates/test_util/data/model/sample.vvm
[DEBUG] __main__: voicevox_core.supported_devices()=SupportedDevices(cpu=True, cuda=False, dml=False)
[INFO] __main__: Initializing (acceleration_mode=<AccelerationMode.AUTO: 'AUTO'>, open_jtalk_dict_dir=PosixPath('open_jtalk_dic_utf_8-1.11'))
[DEBUG] __main__: synthesizer.metas=[]
[DEBUG] __main__: synthesizer.is_gpu_mode=False
[INFO] __main__: Loading `../../crates/test_util/data/model/sample.vvm`
python ./run.py ./models/vvms/0.vvm
[INFO] __main__: Loading ONNX Runtime (args.onnxruntime='./onnxruntime/lib/libvoicevox_onnxruntime.so.1.17.3')
[DEBUG] __main__: onnxruntime.supported_devices()=SupportedDevices(cpu=True, cuda=True, dml=False)
[INFO] __main__: Initializing (args.mode=<AccelerationMode.AUTO: 'AUTO'>, args.dict_dir=PosixPath('dict/open_jtalk_dic_utf_8-1.11'))
[INFO] voicevox_core.synthesizer: GPUをテストします:
[INFO] voicevox_core.synthesizer: * CUDA (device_id=0): OK
[INFO] voicevox_core.synthesizer: * DirectML (device_id=0): 現在ロードされているONNX Runtimeでは利用できません
[INFO] voicevox_core.synthesizer: CUDA (device_id=0)を利用します
[DEBUG] __main__: synthesizer.metas()=[]
[DEBUG] __main__: synthesizer.is_gpu_mode=True
[INFO] __main__: Loading `models/vvms/0.vvm`
[WARNING] ort.environment: Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
[DEBUG] voicevox_core_python_api: Closing a VoiceModelFile
[INFO] __main__: Creating an AudioQuery from 'この音声は、ボイスボックスを使用して、出力されています。'
[INFO] __main__: Synthesizing with {"accent_phrases": [{"moras": [{"text": "コ", "consonant": "k", "consonant_length": 0.0556899, "vowel": "o", "vowel_length": 0.075180575, "pitch": 5.542309}, {"text": "ノ", "consonant": "n", "consonant_length": 0.06551014, "vowel": "o", "vowel_length": 0.09984577, "pitch": 5.6173983}], "accent": 2, "pause_mora": null, "is_interrogative": false}, {"moras": [{"text": "オ", "consonant": null, "consonant_length": null, "vowel": "o", "vowel_length": 0.116150305, "pitch": 5.7063766}, {"text": "ン", "consonant": null, "consonant_length": null, "vowel": "N", "vowel_length": 0.044380233, "pitch": 5.785717}, {"text": "セ", "consonant": "s", "consonant_length": 0.07719758, "vowel": "e", "vowel_length": 0.08653869, "pitch": 5.662092}, {"text": "エ", "consonant": null, "consonant_length": null, "vowel": "e", "vowel_length": 0.08311573, "pitch": 5.532917}, {"text": "ワ", "consonant": "w", "consonant_length": 0.06373148, "vowel": "a", "vowel_length": 0.16219379, "pitch": 5.293258}], "accent": 1, "pause_mora": {"text": "、", "consonant": null, "consonant_length": null, "vowel": "pau", "vowel_length": 0.35826492, "pitch": 0.0}, "is_interrogative": false}, {"moras": [{"text": "ボ", "consonant": "b", "consonant_length": 0.047082342, "vowel": "o", "vowel_length": 0.12611786, "pitch": 5.583892}, {"text": "イ", "consonant": null, "consonant_length": null, "vowel": "i", "vowel_length": 0.059451744, "pitch": 5.7947493}, {"text": "ス", "consonant": "s", "consonant_length": 0.089278996, "vowel": "u", "vowel_length": 0.11847979, "pitch": 5.818695}, {"text": "ボ", "consonant": "b", "consonant_length": 0.06535433, "vowel": "o", "vowel_length": 0.120458946, "pitch": 5.7965107}, {"text": "ッ", "consonant": null, "consonant_length": null, "vowel": "cl", "vowel_length": 0.06940381, "pitch": 0.0}, {"text": "ク", "consonant": "k", "consonant_length": 0.053739145, "vowel": "U", "vowel_length": 0.05395376, "pitch": 0.0}, {"text": "ス", "consonant": "s", "consonant_length": 0.10222931, "vowel": "u", "vowel_length": 0.071811065, "pitch": 5.8024883}, {"text": "オ", "consonant": null, "consonant_length": null, "vowel": "o", "vowel_length": 0.11092262, "pitch": 5.5036163}], "accent": 4, "pause_mora": null, "is_interrogative": false}, {"moras": [{"text": "シ", "consonant": "sh", "consonant_length": 0.09327768, "vowel": "i", "vowel_length": 0.09126951, "pitch": 5.369444}, {"text": "ヨ", "consonant": "y", "consonant_length": 0.06251812, "vowel": "o", "vowel_length": 0.07805054, "pitch": 5.5021667}, {"text": "オ", "consonant": null, "consonant_length": null, "vowel": "o", "vowel_length": 0.09904325, "pitch": 5.5219536}], "accent": 3, "pause_mora": null, "is_interrogative": false}, {"moras": [{"text": "シ", "consonant": "sh", "consonant_length": 0.04879771, "vowel": "I", "vowel_length": 0.06514315, "pitch": 0.0}, {"text": "テ", "consonant": "t", "consonant_length": 0.0840496, "vowel": "e", "vowel_length": 0.19438823, "pitch": 5.4875555}], "accent": 2, "pause_mora": {"text": "、", "consonant": null, "consonant_length": null, "vowel": "pau", "vowel_length": 0.35208154, "pitch": 0.0}, "is_interrogative": false}, {"moras": [{"text": "シュ", "consonant": "sh", "consonant_length": 0.05436731, "vowel": "U", "vowel_length": 0.06044446, "pitch": 0.0}, {"text": "ツ", "consonant": "ts", "consonant_length": 0.102865085, "vowel": "u", "vowel_length": 0.057028636, "pitch": 5.6402535}, {"text": "リョ", "consonant": "ry", "consonant_length": 0.058293864, "vowel": "o", "vowel_length": 0.080050275, "pitch": 5.6997967}, {"text": "ク", "consonant": "k", "consonant_length": 0.054767884, "vowel": "U", "vowel_length": 0.042932786, "pitch": 0.0}], "accent": 2, "pause_mora": null, "is_interrogative": false}, {"moras": [{"text": "サ", "consonant": "s", "consonant_length": 0.08067487, "vowel": "a", "vowel_length": 0.07377973, "pitch": 5.652378}, {"text": "レ", "consonant": "r", "consonant_length": 0.040600352, "vowel": "e", "vowel_length": 0.079322875, "pitch": 5.6290326}, {"text": "テ", "consonant": "t", "consonant_length": 0.06773268, "vowel": "e", "vowel_length": 0.08347456, "pitch": 5.6427326}], "accent": 3, "pause_mora": null, "is_interrogative": false}, {"moras": [{"text": "イ", "consonant": null, "consonant_length": null, "vowel": "i", "vowel_length": 0.07542324, "pitch": 5.641289}, {"text": "マ", "consonant": "m", "consonant_length": 0.066299975, "vowel": "a", "vowel_length": 0.107257664, "pitch": 5.6201453}, {"text": "ス", "consonant": "s", "consonant_length": 0.07186453, "vowel": "U", "vowel_length": 0.1163103, "pitch": 0.0}], "accent": 2, "pause_mora": null, "is_interrogative": false}], "speed_scale": 1.0, "pitch_scale": 0.0, "intonation_scale": 1.0, "volume_scale": 1.0, "pre_phoneme_length": 0.1, "post_phoneme_length": 0.1, "output_sampling_rate": 24000, "output_stereo": false, "kana": "コノ'/オ'ンセエワ、ボイスボ'ッ_クスオ/シヨオ'/_シテ'、_シュツ' リョ_ク/サレテ'/イマ'_ス"}
[INFO] __main__: Synthesizing with {"accent_phrases": […], "speed_scale": 1.0, "pitch_scale": 0.0, "intonation_scale": 1.0, "volume_scale": 1.0, "pre_phoneme_length": 0.1, "post_phoneme_length": 0.1, "output_sampling_rate": 24000, "output_stereo": false, "pause_length": null, "pause_length_scale": 1.0, "kana": "コノ'/オ'ンセエワ、ボイスボ'ッ_クスオ/シヨオ'/_シテ'、_シュツ'リョ_ク/サレテ'/イマ'_ス"}
[INFO] __main__: Wrote `output.wav`
[DEBUG] voicevox_core_python_api: Destructing a VoicevoxCore
[WARNING] voicevox_core_python_api: デストラクタにより`Synthesizer`のクローズを行います。通常は、可能な限り`__exit__`でクローズするようにして下さい
```

正常に実行されれば音声合成の結果である wav ファイルが生成されます。
Expand Down
4 changes: 2 additions & 2 deletions example/python/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ def parse_args() -> "Args":
)
argparser.add_argument(
"--onnxruntime",
default=Onnxruntime.LIB_VERSIONED_FILENAME,
default=f"./onnxruntime/lib/{Onnxruntime.LIB_VERSIONED_FILENAME}",
help="ONNX Runtimeのライブラリのfilename",
)
argparser.add_argument(
"--dict-dir",
default="./open_jtalk_dic_utf_8-1.11",
default="./dict/open_jtalk_dic_utf_8-1.11",
type=Path,
help="Open JTalkの辞書ディレクトリ",
)
Expand Down

0 comments on commit 6bfe018

Please sign in to comment.