diff --git a/lib/tasks/gather.js b/lib/tasks/gather.js index eb12cb67b..d3dc13995 100644 --- a/lib/tasks/gather.js +++ b/lib/tasks/gather.js @@ -590,7 +590,8 @@ class TaskGather extends SttTask { return; } - evt = this.normalizeTranscription(evt, this.vendor, 1, this.language, this.shortUtterance); + evt = this.normalizeTranscription(evt, this.vendor, 1, this.language, + this.shortUtterance, this.data.recognizer.punctuation); if (evt.alternatives.length === 0) { this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening'); return; diff --git a/lib/tasks/transcribe.js b/lib/tasks/transcribe.js index fc8529841..8f9b2892d 100644 --- a/lib/tasks/transcribe.js +++ b/lib/tasks/transcribe.js @@ -305,7 +305,8 @@ class TaskTranscribe extends SttTask { } this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - before normalization'); - evt = this.normalizeTranscription(evt, this.vendor, channel, this.language); + evt = this.normalizeTranscription(evt, this.vendor, channel, this.language, undefined, + this.data.recognizer.punctuation); this.logger.debug({evt}, 'TaskTranscribe:_onTranscription'); if (evt.alternatives.length === 0) { this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening'); diff --git a/lib/utils/transcription-utils.js b/lib/utils/transcription-utils.js index fa1934748..b5659540d 100644 --- a/lib/utils/transcription-utils.js +++ b/lib/utils/transcription-utils.js @@ -338,14 +338,15 @@ const normalizeNuance = (evt, channel, language) => { }; }; -const normalizeMicrosoft = (evt, channel, language) => { +const normalizeMicrosoft = (evt, channel, language, punctuation) => { const copy = JSON.parse(JSON.stringify(evt)); const nbest = evt.NBest; const language_code = evt.PrimaryLanguage?.Language || language; const alternatives = nbest ? nbest.map((n) => { return { confidence: n.Confidence, - transcript: n.Display + // remove all puntuation if needed + transcript: punctuation ? n.Display : n.Display.replace(/\p{P}/gu, '') }; }) : [ @@ -400,14 +401,14 @@ const normalizeAssemblyAi = (evt, channel, language) => { }; module.exports = (logger) => { - const normalizeTranscription = (evt, vendor, channel, language, shortUtterance) => { + const normalizeTranscription = (evt, vendor, channel, language, shortUtterance, punctuation) => { //logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription'); switch (vendor) { case 'deepgram': return normalizeDeepgram(evt, channel, language, shortUtterance); case 'microsoft': - return normalizeMicrosoft(evt, channel, language); + return normalizeMicrosoft(evt, channel, language, punctuation); case 'google': return normalizeGoogle(evt, channel, language); case 'aws':