update: test bug fix

open-mmlab · Jul 7, 2024 · ff2fa00 · ff2fa00
1 parent 8f73a15
commit ff2fa00
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 4 deletions.
diff --git a/preprocessors/Emilia/README.md b/preprocessors/Emilia/README.md
@@ -48,7 +48,7 @@ bash env.sh
 
 3. Download the model files from the third-party repositories.
 We acknowledge the wonderful work by these excellent developers!
-- Source Separation: [UVR-MDX-NET-Inst_HQ_3](https://github.com/TRvlvr/model_repo/releases/tag/all_public_uvr_models)
+- Source Separation: [UVR-MDX-NET-Inst_HQ_3.onnx](https://github.com/TRvlvr/model_repo/releases/tag/all_public_uvr_models)
 - VAD: [Silero](https://github.com/snakers4/silero-vad)
 - Speaker Diarization: [pyannote](https://github.com/pyannote/pyannote-audio)
 - ASR: [whisperx-medium](https://github.com/m-bain/whisperX)

diff --git a/preprocessors/Emilia/main.py b/preprocessors/Emilia/main.py
@@ -517,7 +517,8 @@ def main_process(audio_path, save_path=None, audio_name=None):
     if not cfg["huggingface_token"].startswith("hf"):
         raise ValueError(
             "huggingface_token must start with 'hf', check the config file. "
-            "You can get the token at https://huggingface.co/settings/tokens"
+            "You can get the token at https://huggingface.co/settings/tokens. "
+            "Remeber grant access following https://github.com/pyannote/pyannote-audio?tab=readme-ov-file#tldr"
         )
     dia_pipeline = Pipeline.from_pretrained(
         "pyannote/speaker-diarization-3.1",
@@ -532,6 +533,9 @@ def main_process(audio_path, save_path=None, audio_name=None):
         device_name,
         compute_type=args.compute_type,
         threads=args.threads,
+        asr_options={
+            "initial_prompt": "Um, Uh, Ah. Like, you know. I mean, right. Actually. Basically, and right? okay. Alright. Emm. So. Oh. 生于忧患,死于安乐。岂不快哉?当然,嗯,呃,就,这样,那个,哪个,啊,呀,哎呀,哎哟,唉哇,啧,唷,哟,噫!微斯人,吾谁与归?ええと、あの、ま、そう、ええ。äh, hm, so, tja, halt, eigentlich. euh, quoi, bah, ben, tu vois, tu sais, t'sais, eh bien, du coup. genre, comme, style. 응,어,그,음."
+        },
     )
 
     # VAD

diff --git a/preprocessors/Emilia/utils/tool.py b/preprocessors/Emilia/utils/tool.py
@@ -35,7 +35,12 @@ def load_cfg(cfg_path):
             f"{cfg_path} not found. Please copy, configure, and rename `config.json.example` to `{cfg_path}`."
         )
     with open(cfg_path, "r") as f:
-        cfg = json.load(f)
+        try:
+            cfg = json.load(f)
+        except json.decoder.JSONDecodeError as e:
+            raise TypeError(
+                "Please finish the `// TODO:` in the `config.json` file before running the script. Check README.md for details."
+            )
     return cfg
 
 
@@ -175,6 +180,8 @@ def check_env(logger):
         logger.info(
             f"ENV: HF_ENDPOINT = {os.environ['HF_ENDPOINT']}, if downloading slow, try `unset HF_ENDPOINT`"
         )
+    else:
+        logger.info("ENV: HF_ENDPOINT not set")
 
     hostname = os.popen("hostname").read().strip()
     logger.debug(f"HOSTNAME: {hostname}")
@@ -291,7 +298,7 @@ def calculate_audio_stats(
     # iterate over each entry in the JSON to apply all filtering criteria
     for idx, entry in enumerate(data):
         duration = entry["end"] - entry["start"]
-        dnsmos = entry["mos"]["dnsmos"]
+        dnsmos = entry["dnsmos"]
         # remove punctuation and spaces
         char_count = get_char_count(entry["text"])
         if char_count > 0: