Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add smarties speech participle #109

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/SmartTalk.Core/Services/Http/Clients/SmartiesClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using SmartTalk.Core.Ioc;
using Smarties.Messages.Requests.Ask;
using SmartTalk.Core.Settings.Smarties;
using SmartTalk.Messages.Commands.Smarties;
using SmartTalk.Messages.Dto.Smarties;

namespace SmartTalk.Core.Services.Http.Clients;
Expand All @@ -11,6 +12,8 @@ public interface ISmartiesClient : IScopedDependency
Task<AskGptResponse> PerformQueryAsync(AskGptRequest request, CancellationToken cancellationToken);

Task<AskGptEmbeddingResponseDto> GetEmbeddingAsync(AskGptEmbeddingRequestDto request, CancellationToken cancellationToken);

Task<SpeechParticipleResponseDto> SpeechParticipleAsync(SpeechParticipleCommandDto command, CancellationToken cancellationToken);
}

public class SmartiesClient : ISmartiesClient
Expand Down Expand Up @@ -46,4 +49,13 @@ public async Task<AskGptEmbeddingResponseDto> GetEmbeddingAsync(AskGptEmbeddingR

return response;
}

public async Task<SpeechParticipleResponseDto> SpeechParticipleAsync(SpeechParticipleCommandDto command, CancellationToken cancellationToken)
{
var response = await _httpClientFactory.PostAsJsonAsync<SpeechParticipleResponseDto>($"{_smartiesSettings.BaseUrl}/api/Translation/participle", command, cancellationToken, headers: _headers).ConfigureAwait(false);

Log.Information("Speech participle response: {@Response}", response);

return response;
}
}
107 changes: 100 additions & 7 deletions src/SmartTalk.Core/Services/PhoneOrder/PhoneOrderService.Record.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
using System.Net;
using Serilog;
using System.Text;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using SmartTalk.Messages.Enums.STT;
using Smarties.Messages.DTO.OpenAi;
Expand All @@ -10,9 +8,6 @@
using Smarties.Messages.Requests.Ask;
using System.Text.RegularExpressions;
using SmartTalk.Core.Domain.PhoneOrder;
using SmartTalk.Core.Domain.SpeechMatics;
using SmartTalk.Core.Extensions;
using SmartTalk.Messages.Dto.WebSocket;
using SmartTalk.Messages.Dto.PhoneOrder;
using SmartTalk.Messages.Dto.Attachments;
using SmartTalk.Messages.Enums.PhoneOrder;
Expand All @@ -21,9 +16,8 @@
using SmartTalk.Messages.Commands.PhoneOrder;
using SmartTalk.Messages.Requests.PhoneOrder;
using SmartTalk.Messages.Commands.Attachments;
using SmartTalk.Messages.Constants;
using SmartTalk.Messages.Commands.Smarties;
using SmartTalk.Messages.Dto.EasyPos;
using SmartTalk.Messages.Dto.Restaurant;
using SmartTalk.Messages.Dto.WeChat;
using TranscriptionFileType = SmartTalk.Messages.Enums.STT.TranscriptionFileType;
using TranscriptionResponseFormat = SmartTalk.Messages.Enums.STT.TranscriptionResponseFormat;
Expand Down Expand Up @@ -63,6 +57,17 @@ public async Task ReceivePhoneOrderRecordAsync(ReceivePhoneOrderRecordCommand co

if (await CheckOrderExistAsync(recordInfo.OrderDate.AddHours(-8), cancellationToken).ConfigureAwait(false)) return;

try
{
await SpeechParticipleAsync(command.RecordName, command.RecordContent, recordInfo, cancellationToken).ConfigureAwait(false);

return;
}
catch (Exception ex)
{
Log.Error(ex, "Phone order record SpeechParticipleAsync error");
}

var transcription = await _speechToTextService.SpeechToTextAsync(
command.RecordContent, fileType: TranscriptionFileType.Wav, responseFormat: TranscriptionResponseFormat.Text, cancellationToken: cancellationToken).ConfigureAwait(false);

Expand Down Expand Up @@ -94,6 +99,94 @@ public async Task ReceivePhoneOrderRecordAsync(ReceivePhoneOrderRecordCommand co

await AddPhoneOrderRecordAsync(record, PhoneOrderRecordStatus.Diarization, cancellationToken).ConfigureAwait(false);
}

private async Task SpeechParticipleAsync(string recordName, byte[] recordContent, PhoneOrderRecordInformationDto recordInfo, CancellationToken cancellationToken)
{
var record = new PhoneOrderRecord { SessionId = Guid.NewGuid().ToString(), Restaurant = recordInfo.Restaurant, Language = TranscriptionLanguage.English, CreatedDate = recordInfo.OrderDate.AddHours(-8), Status = PhoneOrderRecordStatus.Recieved };

if (await CheckPhoneOrderRecordDurationAsync(recordContent, cancellationToken).ConfigureAwait(false))
{
await AddPhoneOrderRecordAsync(record, PhoneOrderRecordStatus.NoContent, cancellationToken).ConfigureAwait(false);

return;
}

var url = await UploadRecordFileAsync(recordName, recordContent, cancellationToken).ConfigureAwait(false);

Log.Information($"Phone order record file url: {url}", url);

if (string.IsNullOrEmpty(url))
{
await AddPhoneOrderRecordAsync(record, PhoneOrderRecordStatus.NoContent, cancellationToken).ConfigureAwait(false);

return;
}

await AddPhoneOrderRecordAsync(record, PhoneOrderRecordStatus.Diarization, cancellationToken).ConfigureAwait(false);

var speech = await _smartiesClient.SpeechParticipleAsync(new SpeechParticipleCommandDto
{
Url = url,
}, cancellationToken).ConfigureAwait(false);

var (goalText, tip)= await StructureDiarizationResults(speech, record, cancellationToken).ConfigureAwait(false);

await _phoneOrderUtilService.ExtractPhoneOrderShoppingCartAsync(goalText, record, cancellationToken).ConfigureAwait(false);

record.Tips = tip;
record.Status = PhoneOrderRecordStatus.Sent;
record.TranscriptionText = goalText;

await _phoneOrderDataProvider.UpdatePhoneOrderRecordsAsync(record, cancellationToken: cancellationToken).ConfigureAwait(false);
}

private async Task<(string, string)> StructureDiarizationResults(SpeechParticipleResponseDto results, PhoneOrderRecord record, CancellationToken cancellationToken)
{
var goalTexts = "";
var conversationIndex = 0;
var conversations = new List<PhoneOrderConversation>();

foreach (var speech in results.Result)
{
var originText = speech.Value.Text;

var currentRole = int.Parse(speech.Key) % 2 == 0 ? PhoneOrderRole.Restaurant : PhoneOrderRole.Client;

goalTexts += currentRole + ": " + originText + "\n";

if (currentRole == PhoneOrderRole.Restaurant)
conversations.Add(new PhoneOrderConversation { RecordId = record.Id, Question = originText, Order = conversationIndex });
else
{
conversations[conversationIndex].Answer = originText;
conversationIndex++;
}
}

Log.Information("Structure diarization goalTextsString : {@speakInfos}, conversations:{@conversations}", goalTexts, conversations);

if (await CheckRestaurantRecordingRoleAsync(goalTexts, cancellationToken).ConfigureAwait(false))
{
if (conversations[0].Question.IsNullOrEmpty())
{
conversations.Insert(0, new PhoneOrderConversation
{
Order = 0,
Answer = "",
Question = "",
RecordId = record.Id
});
}

ShiftConversations(conversations);
}

goalTexts = ProcessConversation(conversations);

await _phoneOrderDataProvider.AddPhoneOrderConversationsAsync(conversations, true, cancellationToken).ConfigureAwait(false);

return (goalTexts, conversations.First().Question);
}

private async Task<bool> CheckOrderExistAsync(DateTimeOffset createdDate, CancellationToken cancellationToken)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using Newtonsoft.Json;
using Smarties.Messages.Responses;

namespace SmartTalk.Messages.Commands.Smarties;

public class SpeechParticipleCommandDto
{
public string Url { get; set; }
}

public class SpeechParticipleResponse : SmartiesResponse<SpeechParticipleResponseDto>
{
}

public class SpeechParticipleResponseDto
{
[JsonProperty("status")]
public int Status { get; set; }

[JsonProperty("message")]
public string Message { get; set; }

[JsonProperty("result")]
public Dictionary<string, SpeechParticipleResultDto> Result { get; set; }
}

public class SpeechParticipleResultDto
{
[JsonProperty("start_time")]
public string StartTime { get; set; }

[JsonProperty("end_time")]
public string EndTime { get; set; }

[JsonProperty("text")]
public string Text { get; set; }
}