diff --git a/lib/tasks/dial.js b/lib/tasks/dial.js index f6a66102..d650c08a 100644 --- a/lib/tasks/dial.js +++ b/lib/tasks/dial.js @@ -110,6 +110,7 @@ class TaskDial extends Task { this.tag = this.data.tag; this.boostAudioSignal = this.data.boostAudioSignal; this._mediaPath = MediaPath.FullMedia; + this.translate = this.data.translate; if (this.dtmfHook) { const {parentDtmfCollector, childDtmfCollector} = parseDtmfOptions(logger, this.data.dtmfCapture || {}); @@ -158,6 +159,7 @@ class TaskDial extends Task { get canReleaseMedia() { const keepAnchor = this.data.anchorMedia || + this.data.translate || this.cs.isBackGroundListen || this.cs.onHoldMusic || ANCHOR_MEDIA_ALWAYS || @@ -291,6 +293,17 @@ class TaskDial extends Task { this.transcribeTask.span.end(); this.transcribeTask = null; } + if (this.llmTask) { + await this.llmTask.kill(cs); + this.llmTask.span.end(); + this.llmTask = null; + } + + if (this.sdLlmTask) { + await this.sdLlmTask.kill(cs); + this.sdLlmTask.span.end(); + this.sdLlmTask = null; + } this.notifyTaskDone(); } @@ -758,6 +771,30 @@ class TaskDial extends Task { this._killOutdials(); // NB: order is important } + async _startTranslate(cs, sd) { + if (this.translate) { + const {callerLanguage, calleeLanguage, gain} = this.translate; + assert(this.translate.llm, 'Dial:_startTranslate - missing llm in translate'); + this.logger.debug('Dial:_startTranslate start llm services'); + // setup caller LLM task + this.llmTask = makeTask(this.logger, {'llm': this.translate.llm}); + this.llmTask.configureDialTranslate({ + language: callerLanguage, + gain, + }); + + // setup callee LLM task + this.sdLlmTask = makeTask(this.logger, {'llm': this.translate.llm}); + this.sdLlmTask.configureDialTranslate({ + language: calleeLanguage, + gain, + }); + + this.llmTask.exec(cs, {ep: this.ep}); + this.sdLlmTask.exec(cs, {ep: sd.ep}); + } + } + async _onReinvite(req, res) { try { let isHandled = false; diff --git a/lib/tasks/llm/index.js b/lib/tasks/llm/index.js index 2491b687..70603a37 100644 --- a/lib/tasks/llm/index.js +++ b/lib/tasks/llm/index.js @@ -28,6 +28,12 @@ class TaskLlm extends Task { get ep() { return this.cs.ep; } + + configureDialTranslate(opts) { + this.logger.debug(opts, 'TaskLlm:configureDialTranslate'); + this.llm.configureDialTranslate(opts); + } + async exec(cs, {ep}) { await super.exec(cs, {ep}); await this.llm.exec(cs, {ep}); diff --git a/lib/tasks/llm/llms/elevenlabs_s2s.js b/lib/tasks/llm/llms/elevenlabs_s2s.js index 38ae5a68..8607c8c4 100644 --- a/lib/tasks/llm/llms/elevenlabs_s2s.js +++ b/lib/tasks/llm/llms/elevenlabs_s2s.js @@ -2,6 +2,7 @@ const Task = require('../../task'); const TaskName = 'Llm_Elevenlabs_s2s'; const {LlmEvents_Elevenlabs} = require('../../../utils/constants'); const {request} = require('undici'); +const { parseDecibels } = require('drachtio-fsmrf/lib/utils'); const ClientEvent = 'client.event'; const SessionDelete = 'session.delete'; @@ -73,6 +74,28 @@ class TaskLlmElevenlabs_S2S extends Task { get name() { return TaskName; } + configureDialTranslate(opts) { + const {language, gain} = opts; + const {naturalVoice, translatedVoice} = gain; + this.replace_read = true; + this.audio_in_gain = parseDecibels(naturalVoice); + this.audio_injection_gain = parseDecibels(translatedVoice); + + // override the agent prompt to ask for a translation + this.conversation_initiation_client_data = { + ...(this.conversation_initiation_client_data || {}), + conversation_config_override: { + ...(this.conversation_initiation_client_data?.conversation_config_override || {}), + agent: { + prompt: { + prompt: `Please translate the text to ${language}. + Your response should only include the ${language} translation, without any additional words:\n\n` + } + } + } + }; + } + async getSignedUrl() { if (!this.api_key) { return { @@ -164,7 +187,8 @@ class TaskLlmElevenlabs_S2S extends Task { try { const {host, path} = await this.getSignedUrl(); - const args = [ep.uuid, 'session.create', this.input_sample_rate, this.output_sample_rate, host, path]; + const args = [ep.uuid, 'session.create', this.input_sample_rate, this.output_sample_rate, host, path, + ...(this.replace_read ? ['replace_read', this.audio_in_gain, this.audio_injection_gain] : [])]; await this._api(ep, args); } catch (err) { this.logger.error({err}, 'TaskLlmElevenlabs_S2S:_startListening');