Skip to content

Commit

Permalink
update: test bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
yuantuo666 committed Jul 7, 2024
1 parent 8f73a15 commit ff2fa00
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
2 changes: 1 addition & 1 deletion preprocessors/Emilia/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bash env.sh

3. Download the model files from the third-party repositories.
We acknowledge the wonderful work by these excellent developers!
- Source Separation: [UVR-MDX-NET-Inst_HQ_3](https://github.com/TRvlvr/model_repo/releases/tag/all_public_uvr_models)
- Source Separation: [UVR-MDX-NET-Inst_HQ_3.onnx](https://github.com/TRvlvr/model_repo/releases/tag/all_public_uvr_models)
- VAD: [Silero](https://github.com/snakers4/silero-vad)
- Speaker Diarization: [pyannote](https://github.com/pyannote/pyannote-audio)
- ASR: [whisperx-medium](https://github.com/m-bain/whisperX)
Expand Down
6 changes: 5 additions & 1 deletion preprocessors/Emilia/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,8 @@ def main_process(audio_path, save_path=None, audio_name=None):
if not cfg["huggingface_token"].startswith("hf"):
raise ValueError(
"huggingface_token must start with 'hf', check the config file. "
"You can get the token at https://huggingface.co/settings/tokens"
"You can get the token at https://huggingface.co/settings/tokens. "
"Remeber grant access following https://github.com/pyannote/pyannote-audio?tab=readme-ov-file#tldr"
)
dia_pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
Expand All @@ -532,6 +533,9 @@ def main_process(audio_path, save_path=None, audio_name=None):
device_name,
compute_type=args.compute_type,
threads=args.threads,
asr_options={
"initial_prompt": "Um, Uh, Ah. Like, you know. I mean, right. Actually. Basically, and right? okay. Alright. Emm. So. Oh. 生于忧患,死于安乐。岂不快哉?当然,嗯,呃,就,这样,那个,哪个,啊,呀,哎呀,哎哟,唉哇,啧,唷,哟,噫!微斯人,吾谁与归?ええと、あの、ま、そう、ええ。äh, hm, so, tja, halt, eigentlich. euh, quoi, bah, ben, tu vois, tu sais, t'sais, eh bien, du coup. genre, comme, style. 응,어,그,음."
},
)

# VAD
Expand Down
11 changes: 9 additions & 2 deletions preprocessors/Emilia/utils/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ def load_cfg(cfg_path):
f"{cfg_path} not found. Please copy, configure, and rename `config.json.example` to `{cfg_path}`."
)
with open(cfg_path, "r") as f:
cfg = json.load(f)
try:
cfg = json.load(f)
except json.decoder.JSONDecodeError as e:
raise TypeError(
"Please finish the `// TODO:` in the `config.json` file before running the script. Check README.md for details."
)
return cfg


Expand Down Expand Up @@ -175,6 +180,8 @@ def check_env(logger):
logger.info(
f"ENV: HF_ENDPOINT = {os.environ['HF_ENDPOINT']}, if downloading slow, try `unset HF_ENDPOINT`"
)
else:
logger.info("ENV: HF_ENDPOINT not set")

hostname = os.popen("hostname").read().strip()
logger.debug(f"HOSTNAME: {hostname}")
Expand Down Expand Up @@ -291,7 +298,7 @@ def calculate_audio_stats(
# iterate over each entry in the JSON to apply all filtering criteria
for idx, entry in enumerate(data):
duration = entry["end"] - entry["start"]
dnsmos = entry["mos"]["dnsmos"]
dnsmos = entry["dnsmos"]
# remove punctuation and spaces
char_count = get_char_count(entry["text"])
if char_count > 0:
Expand Down

0 comments on commit ff2fa00

Please sign in to comment.