-
Notifications
You must be signed in to change notification settings - Fork 537
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
convert wespeaker models to sherpa-onnx (#475)
- Loading branch information
1 parent
0e23f82
commit aef74c5
Showing
5 changed files
with
506 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
name: export-wespeaker-to-onnx | ||
|
||
on: | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
group: export-wespeaker-to-onnx-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
export-wespeaker-to-onnx: | ||
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' | ||
name: export wespeaker | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [ubuntu-latest] | ||
python-version: ["3.8"] | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- name: Setup Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
|
||
- name: Install Python dependencies | ||
shell: bash | ||
run: | | ||
pip install kaldi-native-fbank numpy onnx onnxruntime | ||
- name: Run | ||
shell: bash | ||
run: | | ||
cd scripts/wespeaker | ||
./run.sh | ||
mv -v *.onnx ../.. | ||
- name: Release | ||
uses: svenstaro/upload-release-action@v2 | ||
with: | ||
file_glob: true | ||
file: ./*.onnx | ||
overwrite: true | ||
repo_name: k2-fsa/sherpa-onnx | ||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
tag: speaker-recongition-models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Introduction | ||
|
||
This folder contains script for adding meta data to onnx models from | ||
https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md | ||
|
||
You can use the models with metadata in sherpa-onnx. | ||
|
||
|
||
**Caution**: You have to add model meta data to `*.onnx` since we plan | ||
to support models from different frameworks. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang) | ||
|
||
""" | ||
This script adds meta data to a model so that it can be used in sherpa-onnx. | ||
Usage: | ||
./add_meta_data.py --model ./voxceleb_resnet34.onnx --language English | ||
""" | ||
|
||
import argparse | ||
from pathlib import Path | ||
from typing import Dict | ||
|
||
import onnx | ||
import onnxruntime | ||
|
||
|
||
def get_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--model", | ||
type=str, | ||
required=True, | ||
help="Path to the input onnx model. Example value: model.onnx", | ||
) | ||
|
||
parser.add_argument( | ||
"--language", | ||
type=str, | ||
required=True, | ||
help="""Supported language of the input model. | ||
Example value: Chinese, English. | ||
""", | ||
) | ||
|
||
parser.add_argument( | ||
"--url", | ||
type=str, | ||
default="https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md", | ||
help="Where the model is downloaded", | ||
) | ||
|
||
parser.add_argument( | ||
"--comment", | ||
type=str, | ||
default="no comment", | ||
help="Comment about the model", | ||
) | ||
|
||
parser.add_argument( | ||
"--sample-rate", | ||
type=int, | ||
default=16000, | ||
help="Sample rate expected by the model", | ||
) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def add_meta_data(filename: str, meta_data: Dict[str, str]): | ||
"""Add meta data to an ONNX model. It is changed in-place. | ||
Args: | ||
filename: | ||
Filename of the ONNX model to be changed. | ||
meta_data: | ||
Key-value pairs. | ||
""" | ||
model = onnx.load(filename) | ||
for key, value in meta_data.items(): | ||
meta = model.metadata_props.add() | ||
meta.key = key | ||
meta.value = str(value) | ||
|
||
onnx.save(model, filename) | ||
|
||
|
||
def get_output_dim(filename) -> int: | ||
filename = str(filename) | ||
session_opts = onnxruntime.SessionOptions() | ||
session_opts.log_severity_level = 3 # error level | ||
sess = onnxruntime.InferenceSession(filename, session_opts) | ||
|
||
for i in sess.get_inputs(): | ||
print(i) | ||
|
||
print("----------") | ||
|
||
for o in sess.get_outputs(): | ||
print(o) | ||
|
||
print("----------") | ||
|
||
assert len(sess.get_inputs()) == 1 | ||
assert len(sess.get_outputs()) == 1 | ||
|
||
i = sess.get_inputs()[0] | ||
o = sess.get_outputs()[0] | ||
|
||
assert i.shape[:2] == ["B", "T"], i.shape | ||
assert o.shape[0] == "B" | ||
|
||
assert i.shape[2] == 80, i.shape | ||
|
||
return o.shape[1] | ||
|
||
|
||
def main(): | ||
args = get_args() | ||
model = Path(args.model) | ||
language = args.language | ||
url = args.url | ||
comment = args.comment | ||
sample_rate = args.sample_rate | ||
|
||
if not model.is_file(): | ||
raise ValueError(f"{model} does not exist") | ||
|
||
assert len(language) > 0, len(language) | ||
assert len(url) > 0, len(url) | ||
|
||
output_dim = get_output_dim(model) | ||
|
||
# all models from wespeaker expect input samples in the range | ||
# [-32768, 32767] | ||
normalize_features = 0 | ||
|
||
meta_data = { | ||
"framework": "wespeaker", | ||
"language": language, | ||
"url": url, | ||
"comment": comment, | ||
"sample_rate": sample_rate, | ||
"output_dim": output_dim, | ||
"normalize_features": normalize_features, | ||
} | ||
print(meta_data) | ||
add_meta_data(filename=str(model), meta_data=meta_data) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
echo "Downloading models" | ||
export GIT_LFS_SKIP_SMUDGE=1 | ||
git clone https://huggingface.co/openspeech/wespeaker-models | ||
cd wespeaker-models | ||
git lfs pull --include "*.onnx" | ||
ls -lh | ||
cd .. | ||
mv wespeaker-models/*.onnx . | ||
ls -lh | ||
|
||
./add_meta_data.py \ | ||
--model ./voxceleb_resnet34.onnx \ | ||
--language English \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_resnet34.onnx | ||
./test.py --model ./voxceleb_resnet34.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00024_spk1.wav \ | ||
|
||
./test.py --model ./voxceleb_resnet34.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00010_spk2.wav | ||
|
||
mv voxceleb_resnet34.onnx en_voxceleb_resnet34.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./voxceleb_resnet34_LM.onnx \ | ||
--language English \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_resnet34_LM.onnx | ||
./test.py --model ./voxceleb_resnet34_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00024_spk1.wav \ | ||
|
||
./test.py --model ./voxceleb_resnet34_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00010_spk2.wav | ||
|
||
mv voxceleb_resnet34_LM.onnx en_voxceleb_resnet34_LM.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./voxceleb_resnet152_LM.onnx \ | ||
--language English \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_resnet152_LM.onnx | ||
|
||
./test.py --model ./voxceleb_resnet152_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00024_spk1.wav \ | ||
|
||
./test.py --model ./voxceleb_resnet152_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00010_spk2.wav | ||
|
||
mv voxceleb_resnet152_LM.onnx en_voxceleb_resnet152_LM.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./voxceleb_resnet221_LM.onnx \ | ||
--language English \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_resnet221_LM.onnx | ||
|
||
./test.py --model ./voxceleb_resnet221_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00024_spk1.wav \ | ||
|
||
./test.py --model ./voxceleb_resnet221_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00010_spk2.wav | ||
|
||
mv voxceleb_resnet221_LM.onnx en_voxceleb_resnet221_LM.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./voxceleb_resnet293_LM.onnx \ | ||
--language English \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_resnet293_LM.onnx | ||
|
||
./test.py --model ./voxceleb_resnet293_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00024_spk1.wav \ | ||
|
||
./test.py --model ./voxceleb_resnet293_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00010_spk2.wav | ||
|
||
mv voxceleb_resnet293_LM.onnx en_voxceleb_resnet293_LM.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./voxceleb_CAM++.onnx \ | ||
--language English \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_CAM++.onnx | ||
|
||
./test.py --model ./voxceleb_CAM++.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00024_spk1.wav \ | ||
|
||
./test.py --model ./voxceleb_CAM++.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00010_spk2.wav | ||
|
||
mv voxceleb_CAM++.onnx en_voxceleb_CAM++.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./voxceleb_CAM++_LM.onnx \ | ||
--language English \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/voxceleb/voxceleb_CAM++_LM.onnx | ||
|
||
./test.py --model ./voxceleb_CAM++_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00024_spk1.wav \ | ||
|
||
./test.py --model ./voxceleb_CAM++_LM.onnx \ | ||
--file1 ./wespeaker-models/test_wavs/00001_spk1.wav \ | ||
--file2 ./wespeaker-models/test_wavs/00010_spk2.wav | ||
|
||
mv voxceleb_CAM++_LM.onnx en_voxceleb_CAM++_LM.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./cnceleb_resnet34.onnx \ | ||
--language Chinese \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/cnceleb/cnceleb_resnet34.onnx | ||
|
||
mv cnceleb_resnet34.onnx zh_cnceleb_resnet34.onnx | ||
|
||
./add_meta_data.py \ | ||
--model ./cnceleb_resnet34_LM.onnx \ | ||
--language Chinese \ | ||
--url https://wespeaker-1256283475.cos.ap-shanghai.myqcloud.com/models/cnceleb/cnceleb_resnet34_LM.onnx | ||
|
||
mv cnceleb_resnet34_LM.onnx zh_cnceleb_resnet34_LM.onnx | ||
|
||
ls -lh |
Oops, something went wrong.