forked from dgaddy/silent_speech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathasr.py
28 lines (26 loc) · 1.04 KB
/
asr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import os
import logging
import deepspeech
import jiwer
import soundfile as sf
import numpy as np
from unidecode import unidecode
def evaluate(testset, audio_directory):
model = deepspeech.Model('deepspeech-0.7.0-models.pbmm')
model.enableExternalScorer('deepspeech-0.7.0-models.scorer')
predictions = []
targets = []
for i, datapoint in enumerate(testset):
audio, rate = sf.read(os.path.join(audio_directory,f'example_output_{i}.wav'))
assert rate == model.sampleRate(), 'wrong sample rate'
audio_int16 = (audio*(2**15)).astype(np.int16)
text = model.stt(audio_int16)
predictions.append(text)
target_text = unidecode(datapoint['text'])
targets.append(target_text)
transformation = jiwer.Compose([jiwer.RemovePunctuation(), jiwer.ToLowerCase()])
targets = transformation(targets)
predictions = transformation(predictions)
logging.info(f'targets: {targets}')
logging.info(f'predictions: {predictions}')
logging.info(f'wer: {jiwer.wer(targets, predictions)}')