From 6cff7637e647058bab868dbc1fd027207d394127 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 18 Jan 2025 08:56:20 +0100 Subject: [PATCH] chore(llama.cpp): update dependency Update to '3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6' and adapt to upstream changes Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 03468ffb0a8a..1f1ffb3e5716 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4dbc8b9cb71876e005724f4e8f73a3544646bcf5 +CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 4e75e7b07c90..9aeb34dbf5c9 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c return out; } +// Adds an RPC server +// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 +static void add_rpc_devices(std::string servers) { + auto rpc_servers = string_split(servers, ','); + if (rpc_servers.empty()) { + throw std::invalid_argument("no RPC servers specified"); + } + ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); + if (!rpc_reg) { + throw std::invalid_argument("failed to find RPC backend"); + } + typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); + ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); + if (!ggml_backend_rpc_add_device_fn) { + throw std::invalid_argument("failed to find RPC device add function"); + } + for (const auto & server : rpc_servers) { + ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); + if (dev) { + ggml_backend_device_register(dev); + } else { + throw std::invalid_argument("failed to register RPC device"); + } + } +} + // convert a vector of completion_token_output to json static json probs_vector_to_json(const llama_context *ctx, const std::vector &probs) { @@ -2282,7 +2308,7 @@ static void params_parse(const backend::ModelOptions* request, const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS"); if (llama_grpc_servers != NULL) { - params.rpc_servers = std::string(llama_grpc_servers); + add_rpc_devices(std::string(llama_grpc_servers)); } // TODO: Add yarn