pollubnet · ktos · Jun 9, 2024 · Jun 9, 2024 · Jun 9, 2024
diff --git a/README.md b/README.md
diff --git a/Server/Fracture.Server.csproj b/Server/Fracture.Server.csproj
@@ -20,6 +20,7 @@
       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
     </PackageReference>
     <PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="8.0.1" />
+    <PackageReference Include="OpenAI-DotNet" Version="7.7.8" />
     <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
     <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.Server" Version="8.0.1" />
   </ItemGroup>

diff --git a/Server/Modules/AI/Models/AIBackendConfiguration.cs b/Server/Modules/AI/Models/AIBackendConfiguration.cs
@@ -0,0 +1,23 @@
+namespace Fracture.Server.Modules.AI.Models;
+
+/// <summary>
+/// The config for the AI provider's endpoint.
+/// </summary>
+public class AIBackendConfiguration
+{
+    /// <summary>
+    /// The OpenAI-compatible endpoint's URL. If null, the default one (OpenAI)
+    /// will be used.
+    /// </summary>
+    public string? EndpointUrl { get; set; }
+
+    /// <summary>
+    /// The API key to be used during the communication with the AI backend.
+    /// </summary>
+    public string? ApiKey { get; set; }
+
+    /// <summary>
+    /// Name of the model to be used (e.g. "chatgpt-3.5-turbo" or "mistral")
+    /// </summary>
+    public required string Model { get; set; }
+}
diff --git a/Server/Modules/AI/Models/AIGenerationContext.cs b/Server/Modules/AI/Models/AIGenerationContext.cs
@@ -0,0 +1,51 @@
+using System.Diagnostics.CodeAnalysis;
+
+namespace Fracture.Server.Modules.AI.Models;
+
+/// <summary>
+/// The neccessary thing for generating AI responses.
+/// </summary>
+public class AIGenerationContext
+{
+    /// <summary>
+    /// The prompt for generation.
+    /// </summary>
+    public required string Prompt { get; init; }
+
+    /// <summary>
+    /// The name of the used model, if null the default one will be used
+    /// </summary>
+    public string? Model { get; init; }
+
+    /// <summary>
+    /// The stop tokens.
+    /// <para>Tokens, that will stop the further generation of the response,
+    /// typically used to limit the response for example to not include the
+    /// predicted next parts of the conversation.
+    /// </para>
+    /// </summary>
+    public string[]? StopTokens { get; init; }
+
+    /// <summary>
+    /// The maximum count of the tokens predicted.
+    /// <para>The number of the tokens predicted plus the number of the
+    /// tokens in prompt must not exceed the model's context length.</para>
+    /// </summary>
+    public int? MaxTokens { get; set; } = 128;
+
+    /// <summary>
+    /// Temperature, controls the randomness of the model
+    /// <para>It should be between 0.0 and 2.0, higher values like 0.8 will
+    /// make the output more random, while the lower values (e.g. 0.2) will
+    /// make it more deterministic.</para>
+    /// </summary>
+    public double Temperature { get; set; } = 0.7;
+
+    /// <summary>
+    /// Nucleus sampling, controls probability of the tokens
+    /// <para>0.1 will mean that only tokens from the top of 10% of
+    /// probability are considered. Generally it is recommended to alter
+    /// this or temperature, but not both.</para>
+    /// </summary>
+    public double? TopP { get; set; }
+}
diff --git a/Server/Modules/AI/Services/IAIInstructionProvider.cs b/Server/Modules/AI/Services/IAIInstructionProvider.cs
@@ -0,0 +1,24 @@
+using Fracture.Server.Modules.AI.Models;
+
+namespace Fracture.Server.Modules.AI.Services;
+
+/// <summary>
+/// Provides the ability to generate the response to a prompt or instruction
+/// </summary>
+public interface IAIInstructionProvider
+{
+    /// <summary>
+    /// Generates the response to an instruction (for instruction-following models)
+    /// </summary>
+    /// <param name="instruction">The actual instruction for the model to follow.</param>
+    /// <returns>The resulting generated response to said instruction.</returns>
+    Task<string> GenerateInstructionResponse(string instruction);
+
+    /// <summary>
+    /// Generates the response for a generation context, which is including prompt
+    /// and more parameters.
+    /// </summary>
+    /// <param name="context">Generation context with prompt and generation parameters</param>
+    /// <returns>The resulting generated response to the given context.</returns>
+    Task<string> GenerateResponse(AIGenerationContext context);
+}
diff --git a/Server/Modules/AI/Services/OpenAICompatibleInstructionProvider.cs b/Server/Modules/AI/Services/OpenAICompatibleInstructionProvider.cs
@@ -0,0 +1,59 @@
+using Fracture.Server.Modules.AI.Models;
+using Microsoft.Extensions.Options;
+using OpenAI;
+using OpenAI.Chat;
+
+namespace Fracture.Server.Modules.AI.Services;
+
+public class OpenAICompatibleInstructionProvider : IAIInstructionProvider
+{
+    private readonly OpenAIClient _api;
+    private readonly AIBackendConfiguration _configuration;
+
+    public OpenAICompatibleInstructionProvider(IOptions<AIBackendConfiguration> configuration)
+    {
+        _configuration = configuration.Value;
+
+        if (_configuration.EndpointUrl is null)
+        {
+            ArgumentException.ThrowIfNullOrEmpty(nameof(_configuration.ApiKey));
+
+            _api = new OpenAIClient(new OpenAIAuthentication(_configuration.ApiKey));
+        }
+        else
+        {
+            var settings = new OpenAIClientSettings(domain: _configuration.EndpointUrl);
+
+            if (_configuration.ApiKey is null)
+            {
+                _api = new OpenAIClient(clientSettings: settings);
+            }
+            else
+            {
+                _api = new OpenAIClient(new OpenAIAuthentication(_configuration.ApiKey), settings);
+            }
+        }
+    }
+
+    public async Task<string> GenerateInstructionResponse(string instruction)
+    {
+        return await GenerateResponse(new() { Prompt = instruction });
+    }
+
+    public async Task<string> GenerateResponse(AIGenerationContext context)
+    {
+        var messages = new List<Message> { new Message(Role.User, context.Prompt) };
+
+        var chatRequest = new ChatRequest(
+            messages,
+            model: context.Model ?? _configuration.Model,
+            temperature: context.Temperature,
+            stops: context.StopTokens,
+            maxTokens: context.MaxTokens,
+            topP: context.TopP
+        );
+        var response = await _api.ChatEndpoint.GetCompletionAsync(chatRequest);
+
+        return response.FirstChoice;
+    }
+}
diff --git a/Server/Program.cs b/Server/Program.cs
@@ -1,4 +1,6 @@
 using Fracture.Server.Components;
+using Fracture.Server.Modules.AI.Models;
+using Fracture.Server.Modules.AI.Services;
 using Fracture.Server.Modules.Database;
 using Fracture.Server.Modules.Items.Models;
 using Fracture.Server.Modules.Items.Services;
@@ -12,11 +14,14 @@
 
 // Add services to the container.
 builder.Services.Configure<NameGeneratorConfig>(builder.Configuration.GetSection("NameGenerator"));
+builder.Services.Configure<AIBackendConfiguration>(builder.Configuration.GetSection("AiBackend"));
 
 builder.Services.AddSingleton<INameGenerator, MarkovNameGenerator>();
 builder.Services.AddSingleton<IItemGenerator, ItemGenerator>();
 builder.Services.AddSingleton<PrefixesGenerator>();
 builder.Services.AddSingleton<VersionInfoProvider>();
+builder.Services.AddSingleton<IAIInstructionProvider, OpenAICompatibleInstructionProvider>();
+
 builder.Services.AddScoped<IUsersRepository, UsersRepository>();
 builder.Services.AddScoped<IItemsRepository, ItemsRepository>();
 

diff --git a/docs/ai.md b/docs/ai.md
@@ -1,84 +1,47 @@
 # Backend AI
 
 Projekt komunikuje się z modelem językowym na backendzie, w tym momencie
-dostosowany jest do serwera REST uruchamianego przez
-[llama.cpp](https://github.com/ggerganov/llama.cpp) (aplikacja `server`), oraz
-testowana była na modelu Mistral-7B i jego pochodnych, w szczególności
-[Mistral-RP-0.1-7B-GGUF](https://huggingface.co/Undi95/Mistral-RP-0.1-7B-GGUF?not-for-all-audiences=true).
+dostosowany jest do serwerów, które oferują API podobne do tego, które oferuje
+OpenAI, więc można wykorzystać zarówno oficjalny system OpenAI, jak i produkty w
+rodzaju vLLM, serwera llama.cpp lub [ollama](https://ollama.com/).
 
-Aby uruchomić serwer modelu językowego, należy pobrać (lub skompilować)
-llama.cpp, pobrać plik modelu z serwera huggingface.co i uruchomić serwer
-wydając polecenie, na przykład:
+## ollama
 
-```bash
-./server -m <plik modelu> -ngl 35 --host 127.0.0.1
-```
-
-(tutaj następuje przeniesienie 35 warstw modelu na urządzenie CUDA, wymagana
-jest odmiana llama.cpp z obsługą CUDA, w przeciwnym wypadku parametr `-ngl` nie
-jest dostępny)
-
-Pełna dokumentacja serwera:
-<https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md>
-
-## docker
-
-Można też uruchomić Backend AI llama.cpp w oparciu o plik
-`docker-compose-;lamacpp.yml`. W tym celu należy w pliku `.env`, w którym
-znajduje się konfiguracja ustawień lokalnej bazy danych dodać kolejne dwie
-zmienne definiujące wariant serwera oraz ścieżkę lokalną do pliku modelu, np.:
-
-```env
-LLAMA_VARIANT=full
-MODEL_PATH=U:\ml\krakowiak-7b.gguf.q4_k_m.bin
-```
-
-Dostępne warianty serwera to: `full` (CPU), `full-cuda` (NVIDIA GPU) i
-`full-rocm` (AMD ROCm GPU).
-
-Teraz, zamiast wydawać polecenie `docker compose up` tak jak zwykle, możesz
-wydać komendę:
-
-```sh
-docker compose -f docker-compose-llamacpp.yml up
-```
-
-Co uruchomi zarówno serwer modelu językowego, jak i bazy danych niezbędne
-aplikacji głównej.
+[Ollama](https://ollama.com/) to najłatwiejszy sposób na uruchomienie modelu na
+własnym komputerze - wystarczy zainstalować oprogramowanie, wydać komendę
+`ollama pull mistral` i `ollama serve` aby uruchomić kompatybilny z OpenAI
+serwer na własnym komputerze, który będzie mogł uruchamiać popularny i mało
+wymagający model Mistral-7B. Ollama dostępna jest zarówno dla Windows, Linuksa,
+jak i macOS i dostosowana jest zarówno do wykorzystania CUDA na kartach NVIDII,
+jak i ROCm na kartach graficznych AMD (lub pozwala również skorzystać tylko z
+procesora).
 
 ## Konfiguracja backendu AI w aplikacji
 
-Konfiguracja backendu AI jest oparta o plik sekretów (`secrets.json`).
+Konfiguracja backendu AI jest oparta o plik sekretów (`secrets.json`), aby klucz
+API (potencjalnie bardzo wrażliwa informacja) nie "wyciekł", a także, aby każdy
+programista mógł korzystać z własnej konfiguracji AI.
+
 [Przeczytaj
 dokumentację.](https://learn.microsoft.com/en-us/aspnet/core/security/app-secrets?view=aspnetcore-8.0&tabs=linux).
 
-Sekrety aplikacji definiują URL serwera llama.cpp, na przykład:
+Sekrety aplikacji definiują URL serwera i domyślny model, na przykład:
 
 ```json
-"AiEndpoint": {
-    "EndpointUrl": "http://127.0.0.1:8080/completion"
+"AiBackend": {
+    "EndpointUrl": "http://127.0.0.1:11434",
+    "Model": "mistral"
 }
 ```
 
 Opcjonalnie można również dostarczyć klucz API wykorzystywany do komunikacji:
 
 ```json
-{
-  "AiEndpoint": {
-    "EndpointUrl": "http://127.0.0.1:8080/completion",
-    "ApiKey": "this-is-secret"
-  }
+"AiBackend": {
+    "EndpointUrl": "http://127.0.0.1:11434",
+    "Model": "mistral",
+    "ApiKey": "this is a secret"
 }
 ```
 
 URL i klucz API serwera uczelnianego są na Discordzie.
-
-Niezbędne jest również wybranie modułów odpowiedzialnych za komunikację z
-backendem AI i przygotowaniem promptów, które to należy wybrać jako pełne nazwy
-typów, włącznie z ich _assembly_, dla Mistral-7B należy wybrać
-`AlpacaPromptProvider`:
-
-```json
-"AiBackendProvider": "Fracture.Shared.External.Providers.Ai.LlamaCpp.LlamaCppBackendProvider, Fracture.Shared.External, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null",
-"AiPromptTemplateProvider": "Fracture.Shared.External.Providers.Ai.AlpacaPromptProvider, Fracture.Shared.External, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null",
-```