From a17574264644b82ec52046e72147005845fca228 Mon Sep 17 00:00:00 2001
From: Alexander Gutkin <agutkin@google.com>
Date: Mon, 16 Dec 2024 01:05:42 +0000
Subject: [PATCH] Internal change

PiperOrigin-RevId: 706520201
---
 protoscribe/corpus/reader/tasks.py    | 4 +++-
 protoscribe/speech/audio_tokenizer.py | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/protoscribe/corpus/reader/tasks.py b/protoscribe/corpus/reader/tasks.py
index 5849d30..563791b 100644
--- a/protoscribe/corpus/reader/tasks.py
+++ b/protoscribe/corpus/reader/tasks.py
@@ -399,6 +399,7 @@ def register(
     speech_normalize_waveform: bool = False,
     speech_keep_waveform: bool = False,
     speech_tokenizer_name_or_path: str | None = None,
+    speech_normalize_embeddings: bool = False,
     is_training: bool = True,
 ) -> str:
   """Registers task from gin scaffolding."""
@@ -453,7 +454,8 @@ def register(
   if speech_tokenizer_name_or_path:
     speech_tokenizer = audio_tokenizer.get_tokenizer(
         model_config_name_or_path=speech_tokenizer_name_or_path,
-        sample_rate=speech_corpus_sample_rate
+        sample_rate=speech_corpus_sample_rate,
+        normalize_embeddings=speech_normalize_embeddings,
     )
 
   task_name = f"{_TASK_NAME_PREFIX}_{task_name}"
diff --git a/protoscribe/speech/audio_tokenizer.py b/protoscribe/speech/audio_tokenizer.py
index daa7ee7..9f35990 100644
--- a/protoscribe/speech/audio_tokenizer.py
+++ b/protoscribe/speech/audio_tokenizer.py
@@ -22,7 +22,8 @@
 def get_tokenizer(
     model_config_name_or_path: str,
     sample_rate: int,
-    has_quantizer: bool = False
+    has_quantizer: bool = False,
+    normalize_embeddings: bool = False,
 ) -> AudioTokenizer | None:
   """Manufactures an instance of audio tokenizer.
 
@@ -32,6 +33,7 @@ def get_tokenizer(
     sample_rate: Sampling rate in Hz.
       has_quantizer: True if the model has quantizer. In this case it should be
         possible to retrieve discrete tokens in addition to the embeddings.
+    normalize_embeddings: Use embeddings as is (default) or normalize them.
 
   Returns:
     Audio tokenizer instance.