From a17574264644b82ec52046e72147005845fca228 Mon Sep 17 00:00:00 2001 From: Alexander Gutkin Date: Mon, 16 Dec 2024 01:05:42 +0000 Subject: [PATCH] Internal change PiperOrigin-RevId: 706520201 --- protoscribe/corpus/reader/tasks.py | 4 +++- protoscribe/speech/audio_tokenizer.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/protoscribe/corpus/reader/tasks.py b/protoscribe/corpus/reader/tasks.py index 5849d30..563791b 100644 --- a/protoscribe/corpus/reader/tasks.py +++ b/protoscribe/corpus/reader/tasks.py @@ -399,6 +399,7 @@ def register( speech_normalize_waveform: bool = False, speech_keep_waveform: bool = False, speech_tokenizer_name_or_path: str | None = None, + speech_normalize_embeddings: bool = False, is_training: bool = True, ) -> str: """Registers task from gin scaffolding.""" @@ -453,7 +454,8 @@ def register( if speech_tokenizer_name_or_path: speech_tokenizer = audio_tokenizer.get_tokenizer( model_config_name_or_path=speech_tokenizer_name_or_path, - sample_rate=speech_corpus_sample_rate + sample_rate=speech_corpus_sample_rate, + normalize_embeddings=speech_normalize_embeddings, ) task_name = f"{_TASK_NAME_PREFIX}_{task_name}" diff --git a/protoscribe/speech/audio_tokenizer.py b/protoscribe/speech/audio_tokenizer.py index daa7ee7..9f35990 100644 --- a/protoscribe/speech/audio_tokenizer.py +++ b/protoscribe/speech/audio_tokenizer.py @@ -22,7 +22,8 @@ def get_tokenizer( model_config_name_or_path: str, sample_rate: int, - has_quantizer: bool = False + has_quantizer: bool = False, + normalize_embeddings: bool = False, ) -> AudioTokenizer | None: """Manufactures an instance of audio tokenizer. @@ -32,6 +33,7 @@ def get_tokenizer( sample_rate: Sampling rate in Hz. has_quantizer: True if the model has quantizer. In this case it should be possible to retrieve discrete tokens in addition to the embeddings. + normalize_embeddings: Use embeddings as is (default) or normalize them. Returns: Audio tokenizer instance.