Skip to content

Commit

Permalink
Bug fixes (#712)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe authored Nov 2, 2023
1 parent f64192d commit 8ef1aba
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 65 deletions.
7 changes: 7 additions & 0 deletions docs/source/changelog/changelog_3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,15 @@
3.0 Changelog
*************

3.0.0a8
=======

- Fixed an issue in not normalizing utterance and speaker xvectors from speechbrain
- Bug fixes for integration with Anchor

3.0.0a7
=======

- Fixed an issue where using relative paths could delete the all MFA temporary files with :code:`--clean`
- Fixed an issue where "<eps>" in transcript to force silence was inserting phones for OOVs rather than silence

Expand Down
2 changes: 1 addition & 1 deletion docs/source/first_steps/tutorials.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

.. _`filing an issue`: https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/issues

.. _`Montreal Forced Aligner v2 Corpus Phonetics Tutorial`: https://www.eleanorchodroff.com/tutorial/montreal-forced-aligner-v2.html
.. _`Montreal Forced Aligner v2 Corpus Phonetics Tutorial`: https://eleanorchodroff.com/tutorial/montreal-forced-aligner.html

.. _`Phonetic forced alignment with the Montreal Forced Aligner`: https://www.youtube.com/watch?v=Zhj-ccMDj_w

Expand Down
14 changes: 8 additions & 6 deletions montreal_forced_aligner/corpus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,6 @@ def initialize_jobs(self) -> None:
bulk_update(session, Utterance, update_mappings, id_field="speaker_id")
session.commit()
if session.query(Dictionary2Job).count() == 0:

dict_job_mappings = []
for job_id, dict_id in (
session.query(Utterance.job_id, Dictionary.id)
Expand Down Expand Up @@ -675,11 +674,11 @@ def normalize_text(self) -> None:
has_words = (
session.query(Dictionary).filter(Dictionary.name == "unknown").first() is None
)
existing_oovs = set()
words = session.query(
Word.id, Word.mapping_id, Word.dictionary_id, Word.word
Word.id, Word.mapping_id, Word.dictionary_id, Word.word, Word.word_type
).order_by(Word.mapping_id)
if not has_words or getattr(self, "use_g2p", False):

word_insert_mappings["<eps>"] = {
"id": word_key,
"word": "<eps>",
Expand All @@ -690,7 +689,10 @@ def normalize_text(self) -> None:
}
word_key += 1
max_mapping_ids[1] = word_key - 1
for w_id, m_id, d_id, w in words:
for w_id, m_id, d_id, w, wt in words:
if wt is WordType.oov:
existing_oovs.add(w)
continue
word_indexes[(d_id, w)] = w_id
word_mapping_ids[(d_id, w)] = m_id
max_mapping_ids[d_id] = m_id
Expand Down Expand Up @@ -818,6 +820,8 @@ def normalize_text(self) -> None:
word_key += 1
else:
for word in to_g2p:
if word in existing_oovs:
continue
if word not in word_insert_mappings:
word_insert_mappings[word] = {
"id": word_key,
Expand Down Expand Up @@ -1151,7 +1155,6 @@ def create_subset(self, subset: int) -> None:
logger.debug(f"For {dict_id}, total number of utterances is {num_utts}")
larger_subset_num = int(subset_per_dictionary * 10)
if num_utts > larger_subset_num:

larger_subset_query = (
session.query(Utterance.id)
.join(Utterance.speaker)
Expand Down Expand Up @@ -1182,7 +1185,6 @@ def create_subset(self, subset: int) -> None:
session.execute(query)
logger.debug(f"For {dict_id}, subset is {subset_per_dictionary}")
elif num_utts > subset_per_dictionary:

larger_subset_query = (
session.query(Utterance.id)
.join(Utterance.speaker)
Expand Down
1 change: 1 addition & 0 deletions montreal_forced_aligner/corpus/ivector_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ def compute_plda(self, minimum_utterance_count: int = 1) -> None:
pbar.update(1)
vector = DoubleVector()
vector.from_numpy(ivector)
ivector_normalize_length(vector)
ivector_mat.Row(i).CopyFromVec(vector)
num_utt_done += 1
update_mapping.append({"id": speaker_id, "num_utterances": num_utterances})
Expand Down
7 changes: 5 additions & 2 deletions montreal_forced_aligner/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,11 @@ def aligned_phone_intervals(self) -> typing.List[CtmInterval]:
"""
Phone intervals from :attr:`montreal_forced_aligner.data.WorkflowType.alignment`
"""
return [x.as_ctm() for x in self.phone_intervals]
return [
x.as_ctm()
for x in self.phone_intervals
if x.workflow.workflow_type in [WorkflowType.alignment, WorkflowType.online_alignment]
]

@property
def aligned_word_intervals(self) -> typing.List[CtmInterval]:
Expand Down Expand Up @@ -1760,7 +1764,6 @@ def dictionary_ids(self) -> typing.List[int]:
def construct_feature_archive(
self, working_directory: Path, dictionary_id: typing.Optional[int] = None, **kwargs
):

fmllr_path = self.construct_path(
self.corpus.current_subset_directory, "trans", "scp", dictionary_id
)
Expand Down
61 changes: 6 additions & 55 deletions montreal_forced_aligner/diarization/speaker_diarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import sqlalchemy
import yaml
from _kalpy.ivector import Plda, ivector_normalize_length
from _kalpy.matrix import FloatVector
from _kalpy.matrix import DoubleVector, FloatVector
from kalpy.utils import read_kaldi_object
from sklearn import metrics
from sqlalchemy.orm import joinedload, selectinload
Expand Down Expand Up @@ -411,7 +411,6 @@ def classify_speakers(self):

def map_speakers_to_ground_truth(self):
with self.session() as session:

utterances = session.query(Utterance.id, Utterance.speaker_id)
labels = []
utterance_ids = []
Expand Down Expand Up @@ -441,7 +440,6 @@ def evaluate_clustering(self) -> None:
with self.session() as session, mfa_open(
self.working_directory.joinpath("diarization_evaluation_results.csv"), "w"
) as f:

writer = csv.DictWriter(
f,
fieldnames=[
Expand Down Expand Up @@ -805,7 +803,6 @@ def classify_iteration(self, iteration=None) -> None:
with self.session() as session, tqdm(
total=self.num_utterances, disable=config.QUIET
) as pbar:

unknown_speaker_id = (
session.query(Speaker.id).filter(Speaker.name == "MFA_UNKNOWN").first()[0]
)
Expand Down Expand Up @@ -1317,7 +1314,6 @@ def load_embeddings(self) -> None:
begin = time.time()
update_mapping = {}
arguments = [SpeechbrainArguments(1, self.session, None, self.cuda, self.cluster)]
embeddings = []
utterance_ids = []
original_use_mp = config.USE_MP
if self.cuda:
Expand All @@ -1327,8 +1323,11 @@ def load_embeddings(self) -> None:
SpeechbrainEmbeddingFunction, arguments, pbar.update
):
utterance_ids.append(u_id)
embeddings.append(emb)
update_mapping[u_id] = {"id": u_id, "xvector": emb}

kaldi_ivector = DoubleVector()
kaldi_ivector.from_numpy(emb)
ivector_normalize_length(kaldi_ivector)
update_mapping[u_id] = {"id": u_id, "xvector": kaldi_ivector.numpy()}
if len(update_mapping) >= batch_size:
bulk_update(session, Utterance, list(update_mapping.values()))
session.commit()
Expand Down Expand Up @@ -1404,53 +1403,6 @@ def refresh_plda_vectors(self):
session.execute(sqlalchemy.text("VACUUM ANALYZE Utterance"))
logger.debug(f"Refreshing utterance PLDA vectors took {time.time() - begin:.3f} seconds.")

def refresh_speaker_plda_vectors(self):

logger.info("Refreshing speaker PLDA vectors...")
begin = time.time()
self.create_new_current_workflow(WorkflowType.speaker_diarization)
self.plda = read_kaldi_object(Plda, self.plda_path)
with self.session() as session:
session.execute(sqlalchemy.text("DROP INDEX IF EXISTS speaker_plda_vector_index;"))
session.commit()
c = session.query(Corpus).first()
c.plda_calculated = False
update_mapping = []
speakers = (
session.query(Speaker.id, c.speaker_ivector_column, sqlalchemy.func.count())
.join(Utterance.speaker)
.filter(c.speaker_ivector_column != None) # noqa
.group_by(Speaker.id)
)
with tqdm(total=self.num_speakers, disable=config.QUIET) as pbar:
for s_id, ivector, utt_count in speakers:
kaldi_ivector = FloatVector()
kaldi_ivector.from_numpy(ivector)
pbar.update(1)
update_mapping.append(
{
"id": s_id,
"plda_vector": self.plda.transform_ivector(
kaldi_ivector, utt_count
).numpy(),
}
)

bulk_update(session, Speaker, update_mapping)
c.plda_calculated = True
session.flush()
n_lists = int(math.sqrt(self.num_speakers))
session.execute(
sqlalchemy.text(
f"CREATE INDEX IF NOT EXISTS speaker_plda_vector_index ON speaker USING ivfflat (plda_vector vector_cosine_ops) WITH (lists = {n_lists});"
)
)
session.commit()
autocommit_engine = self.db_engine.execution_options(isolation_level="AUTOCOMMIT")
with sqlalchemy.orm.Session(autocommit_engine) as session:
session.execute(sqlalchemy.text("VACUUM ANALYZE Speaker"))
logger.debug(f"Refreshing speaker PLDA vectors took {time.time() - begin:.3f} seconds.")

def refresh_speaker_vectors(self) -> None:
"""Refresh speaker vectors following clustering or classification"""
begin = time.time()
Expand Down Expand Up @@ -1585,7 +1537,6 @@ def export_files(self, output_directory: str) -> None:
Dumper=yaml.Dumper,
)
with self.session() as session:

logger.info("Writing output files...")
files = session.query(File).options(
selectinload(File.utterances),
Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def get_mfa_version() -> str:
try:
from ._version import version as __version__ # noqa
except ImportError:
__version__ = "2.0.0"
__version__ = "3.0.0"
return __version__


Expand Down

0 comments on commit 8ef1aba

Please sign in to comment.